library(knitr)
library(pacman)
p_load(plyr,
tidyverse,
VennDiagram,
pheatmap,
gplots,
gridExtra,
EnhancedVolcano,
venn,
xlsx,
gprofiler2,
corrplot,
PerformanceAnalytics,
Hmisc,
edgeR,
limma,
Glimma,
gplots,
org.Mm.eg.db,
RColorBrewer)
p_load_gh("kassambara/ggpubr")
source("./scripts/TAMPOR.R")
Mayo_Proteomics_TC_traits <- read.csv("./data_prot/Mayo_Proteomics_TC_traits.csv",
sep = ",",
header = TRUE,
stringsAsFactors = FALSE)
Mayo_Proteomics_TC_proteinoutput <- read.table("./data_prot/Mayo_Proteomics_TC_proteinoutput.txt",
sep = "\t",
header = TRUE,
stringsAsFactors = FALSE)
Mayo_Proteomics_TC_proteinoutput has dimensions: 6585, 2105.
Format of the colnames: “LFQ.intensity.mayo_b5_egis_45”
## Fitler out Potential.contaminant and Reverse "+"
dat <- Mayo_Proteomics_TC_proteinoutput %>%
filter(Potential.contaminant != "+") %>%
filter(Reverse != "+")
Now ours data has dimensions: 6335, 2105.
## Select LFQ + egis columns only
dat.m <- dat %>%
dplyr::select(starts_with("LFQ")) %>%
dplyr::select(!contains("egis"))
dim(dat.m)
## [1] 6335 215
There are no missing values in our dataset but plenty of zeroes.
There are 3448027 zeroes.
Traits file:
traits <- Mayo_Proteomics_TC_traits[1:4]
Rownames to satisfy TAMPOR’s conventions
rownames(traits) <- traits$Samples_Simple %>%
gsub("_", "\\.", .)
traits.e <- traits[1:215, ] # egis
traits.m <- traits[c(1:200, 216:230), ] # mgis
Renaming columns to match the traits
dat.mt <- dat.m
## Renaming scheme:
## egis samples: "LFQ.intensity.mayo_b5_egis_24" -> "b5.24"
## non-egis: "LFQ.intensity.mayo_b5_197_41" -> "b5.197"
colnames(dat.mt) <- dat.m %>%
names %>%
gsub("LFQ.intensity.mayo_", "", .) %>%
gsub("mgis_", "", .) %>%
gsub("([0-9]{3})_[0-9]{2}$", "\\1", .) %>% # to strip 2 last digits from non egis samples only
gsub("_", "\\.", .)
To make identical column and row names
dat.mt <- dat.mt[, rownames(traits.m)]
Cleanup
## see: https://github.com/edammer/TAMPOR/issues/2
dat.mt[dat.mt<=0]<- NA
## ## ## Samples designated as GIS were removed before visualization of variance and MDS
## TAMPORlist.MGIS.all.true <- TAMPOR(dat.mt, traits.m,
## noGIS=FALSE,
## useAllNonGIS=TRUE, # This assumes
## # batches are
## # generally
## # trait-balanced
## # and randomized,
## # but GIS and
## # non-GIS samples
## # can be grossly
## # different.
## batchPrefixInSampleNames=TRUE,
## GISchannels=c("01","23", "44", "45"),
## samplesToIgnore = NULL,
## removeGISafter = TRUE,
## parallelThreads=2,
## outputSuffix="MGIS")
## saveRDS(TAMPORlist.MGIS.all.true, "./results_prot/TAMPORlist.MGIS.all.true.rds")
TAMPORlist.MGIS.all.true <- readRDS("./results_prot/TAMPORlist.MGIS.all.true.rds")
## plotMDS(TAMPORlist.GIS$cleanRelAbun %>% log2, col = labels2colors(colnames(TAMPORlist.GIS$cleanRelAbun)))
cra <- TAMPORlist.MGIS.all.true$cleanRelAbun
cra.log <- log2(cra)
conditions.col <- traits$Diagnosis[1:200] %>%
sub("Control", "green", .) %>%
sub("AD", "red", .) %>%
sub("PSP", "blue", .)
traw.log <- log2(dat.mt[1:200])
colnames(traw.log) <- traits[1:200,4] %>% make.unique
tcra.log <- cra.log
colnames(tcra.log) <- traits[1:200,4] %>% make.unique
require(limma)
## opar <- par(no.readonly=TRUE)
png("./figs_prot/mds_tampor_conditions2.png", height = 2000, width = 6000, units = "px")
## tiff("./figs_prot/mds_tampor_conditions2.tiff", height = 2000, width = 6000, units = "px")
par(mfrow = c(1, 2), cex = 5)
plotMDS(traw.log, col = conditions.col)
title("log2 (raw)")
plotMDS(tcra.log, col = conditions.col)
title("After TAMPOR (mgis)")
dev.off()
## png
## 2
## par(opar)
## dev.off()
rm(traw.log, tcra.log)
Figure 2.1: Number of samples: AD = 84, PSP = 85, Control = 31
I used this source making the necessary additions and changes: Data Science Workshop British Society for Proteomic Research Meeting 2018.
Impute the data by replacing the missing values with the median observation for each protein under each condition.
traits <- Mayo_Proteomics_TC_traits %>%
filter(Diagnosis != "EmoryGlobalInternalStandard" &
Diagnosis != "MayoGlobalInternalStandard") %>%
dplyr::select(1:4)
control_cols <- traits %>%
mutate(n = rownames(.) %>% as.numeric) %>%
filter(Diagnosis == "Control") %>%
.$n
ad_cols <- traits %>%
mutate(n = rownames(.) %>% as.numeric) %>%
filter(Diagnosis == "AD") %>%
.$n
psp_cols <- traits %>%
mutate(n = rownames(.) %>% as.numeric) %>%
filter(Diagnosis == "PSP") %>%
.$n
## mean normalisation
## replace_with_mean <- function(x) replace(x, x==0, mean(x))
## median normalisation
replace_with_median <- function(x) replace(x, is.na(x), median(x, na.rm = TRUE))
##
crad <- as.data.frame(cra)
## crad <- as.data.frame(cra[-c(23, 53, 84)])
control <- crad %>%
dplyr::select(all_of(control_cols)) %>%
apply(., 1, replace_with_median) %>%
t %>%
data.frame
ad <- crad %>%
dplyr::select(all_of(ad_cols)) %>%
apply(., 1, replace_with_median) %>%
t %>%
data.frame
psp <- crad %>%
dplyr::select(all_of(psp_cols)) %>%
apply(., 1, replace_with_median) %>%
t %>%
data.frame
data_imp <- cbind.data.frame(control, ad, psp)
# Quantile normalisation : the aim is to give different distributions the
# same statistical properties
quantile_normalisation <- function(df){
# Find rank of values in each column
df_rank <- map_df(df,rank,ties.method="average")
# Sort observations in each column from lowest to highest
df_sorted <- map_df(df,sort)
# Find row mean on sorted columns
df_mean <- rowMeans(df_sorted)
# Function for substiting mean values according to rank
index_to_mean <- function(my_index, my_mean){
return(my_mean[my_index])
}
# Replace value in each column with mean according to rank
df_final <- map_df(df_rank,index_to_mean, my_mean=df_mean)
return(df_final)
}
data_norm <- data_imp %>%
quantile_normalisation()
data_imp %>% ggplot() +
geom_density(aes(b1.001 %>% log2), col = "red") +
geom_density(aes(b2.002 %>% log2), col = "green") +
geom_density(aes(b3.011 %>% log2), col = "blue") +
geom_density(aes(b4.005 %>% log2), col = "magenta") +
geom_density(aes(b5.175 %>% log2)) +
ggtitle("Dstribution before normalization (5 samples from different batches)") +
xlab("log2 Intensity") +
ylab("Density")
data_norm %>% ggplot() +
geom_density(aes(b1.001 %>% log2), col = "red", lwd=2.5) +
geom_density(aes(b2.002 %>% log2), col = "green", lwd=2) +
geom_density(aes(b3.011 %>% log2), col = "blue", lwd = 1.5) +
geom_density(aes(b4.005 %>% log2), col = "magenta", lwd = 1) +
geom_density(aes(b5.175 %>% log2), lwd=0.5) +
ggtitle("Dstribution after normalization (The same 5 samples)") +
xlab("log2 Intensity") +
ylab("Density")
traits <- Mayo_Proteomics_TC_traits %>%
filter(Diagnosis != "EmoryGlobalInternalStandard" &
Diagnosis != "MayoGlobalInternalStandard") %>%
dplyr::select(1:4)
control_cols <- traits %>%
mutate(n = rownames(.) %>% as.numeric) %>%
filter(Diagnosis == "Control") %>%
.$n
ad_cols <- traits %>%
mutate(n = rownames(.) %>% as.numeric) %>%
filter(Diagnosis == "AD") %>%
.$n
psp_cols <- traits %>%
mutate(n = rownames(.) %>% as.numeric) %>%
filter(Diagnosis == "PSP") %>%
.$n
## rearrange columns in cra (optional)
cra <- cra %>%
data.frame %>%
dplyr::select(all_of(c(control_cols, ad_cols, psp_cols)))
diagnosis <- c(rep("Control", length(control_cols)), rep("AD", length(ad_cols)), rep("PSP", length(psp_cols)))
design <- model.matrix( ~ diagnosis + 0)
Build a contrast AD vs Control
AvsC <- makeContrasts(diagnosisAD - diagnosisControl, levels=design)
fit_AvsC <-contrasts.fit(fit, AvsC)
bf_AvsC <- eBayes(fit_AvsC)
bf_AvsC %>% decideTests %>% summary
## diagnosisAD - diagnosisControl
## Down 405
## NotSig 3219
## Up 321
Thus, 405 proteins are significantly downregulated and 321 upregulated in AD versus Control samples (log2 FC cutoff is not applied here).
AvsC_top <- topTable(bf_AvsC, adjust="BH", sort.by = "logFC", number = 1000, p.value = pv_cut, lfc = fc_cut)
write.csv(AvsC_top, "./results_prot/prot_ad_vs_control.csv")
knitr::kable(AvsC_top,
caption = "Top DE proteins: AD vs Control")
| logFC | AveExpr | t | P.Value | adj.P.Val | B | |
|---|---|---|---|---|---|---|
| CO1A1 | 1.6130 | 27.50 | 4.261 | 0.0000 | 0.0008 | 2.1024 |
| H0YD17 | 1.1071 | 27.56 | 4.353 | 0.0000 | 0.0006 | 2.4561 |
| H0YL34 | 1.0660 | 28.03 | 4.872 | 0.0000 | 0.0001 | 4.5628 |
| K7ES72 | 0.9553 | 26.02 | 3.236 | 0.0014 | 0.0132 | -1.3885 |
| HCN4 | -0.9520 | 29.10 | -3.557 | 0.0005 | 0.0060 | -0.3824 |
| C9J6J8 | 0.9262 | 26.19 | 4.712 | 0.0000 | 0.0002 | 3.8926 |
| B5MCB5 | 0.8631 | 27.33 | 4.321 | 0.0000 | 0.0007 | 2.3334 |
| CO4A | 0.8441 | 29.15 | 5.636 | 0.0000 | 0.0000 | 7.9868 |
| TAU.1 | 0.8439 | 30.91 | 6.673 | 0.0000 | 0.0000 | 13.1880 |
| A6NMN0 | 0.8327 | 29.33 | 7.280 | 0.0000 | 0.0000 | 16.4879 |
| H7C545 | 0.8286 | 29.01 | 6.306 | 0.0000 | 0.0000 | 11.2781 |
| H0Y7G9 | 0.8094 | 26.57 | 6.075 | 0.0000 | 0.0000 | 10.1172 |
| Q05BJ3 | -0.7949 | 30.38 | -4.244 | 0.0000 | 0.0008 | 2.0389 |
| H0YG30 | 0.6852 | 27.10 | 8.127 | 0.0000 | 0.0000 | 21.3489 |
| D6R9C5 | 0.6769 | 26.82 | 3.713 | 0.0003 | 0.0040 | 0.1342 |
| C9JTJ8 | 0.6726 | 27.91 | 5.266 | 0.0000 | 0.0000 | 6.2818 |
| C9IZ15 | -0.6573 | 27.43 | -3.123 | 0.0021 | 0.0167 | -1.7208 |
| K7EKD1 | 0.6552 | 28.46 | 4.238 | 0.0000 | 0.0008 | 2.0156 |
| AB17A | 0.6314 | 23.66 | 5.806 | 0.0000 | 0.0000 | 8.7998 |
| IP3KB | 0.6299 | 25.15 | 5.458 | 0.0000 | 0.0000 | 7.1575 |
| K7EP04 | 0.5829 | 27.43 | 4.140 | 0.0001 | 0.0012 | 1.6486 |
| A0A096LP69 | 0.5665 | 27.33 | 4.074 | 0.0001 | 0.0014 | 1.4060 |
| F5H100 | 0.5464 | 27.74 | 3.188 | 0.0017 | 0.0147 | -1.5287 |
| A0A087WUA0 | 0.5431 | 30.80 | 2.672 | 0.0082 | 0.0454 | -2.9413 |
| PLEC | 0.5368 | 25.82 | 6.567 | 0.0000 | 0.0000 | 12.6298 |
| B0YJC5 | 0.5249 | 34.21 | 4.009 | 0.0001 | 0.0017 | 1.1692 |
| C9J3N8 | 0.5035 | 31.71 | 5.258 | 0.0000 | 0.0000 | 6.2424 |
| KV206 | 0.5021 | 25.12 | 4.628 | 0.0000 | 0.0003 | 3.5468 |
| NPY | 0.4981 | 26.80 | 4.750 | 0.0000 | 0.0002 | 4.0484 |
| H3BV46 | -0.4979 | 24.93 | -4.868 | 0.0000 | 0.0001 | 4.5440 |
| TCPR2 | -0.4911 | 25.44 | -2.932 | 0.0038 | 0.0264 | -2.2598 |
| VAMP3 | 0.4629 | 25.34 | 6.918 | 0.0000 | 0.0000 | 14.5012 |
| NPTX2 | -0.4626 | 25.92 | -3.464 | 0.0007 | 0.0075 | -0.6842 |
| E9PNM5 | -0.4625 | 27.86 | -5.402 | 0.0000 | 0.0000 | 6.8978 |
| H0YFX9 | 0.4623 | 30.41 | 6.542 | 0.0000 | 0.0000 | 12.4984 |
| PGAM2 | 0.4611 | 27.85 | 2.851 | 0.0048 | 0.0315 | -2.4793 |
| C9JH18 | -0.4596 | 33.84 | -2.695 | 0.0076 | 0.0433 | -2.8851 |
| PLCA | -0.4572 | 25.84 | -2.676 | 0.0081 | 0.0451 | -2.9326 |
| Q5TBU2 | 0.4468 | 29.49 | 3.218 | 0.0015 | 0.0137 | -1.4421 |
| VPS39 | 0.4460 | 26.33 | 2.744 | 0.0066 | 0.0391 | -2.7580 |
| H3BP73 | 0.4446 | 27.71 | 4.356 | 0.0000 | 0.0006 | 2.4674 |
| B5MC71 | 0.4195 | 26.82 | 3.560 | 0.0005 | 0.0060 | -0.3742 |
| UBCP1 | -0.4177 | 24.84 | -4.133 | 0.0001 | 0.0012 | 1.6243 |
| F5GXC1 | 0.4143 | 26.25 | 3.030 | 0.0028 | 0.0208 | -1.9854 |
| F5GYV6 | 0.4097 | 32.18 | 3.070 | 0.0024 | 0.0190 | -1.8725 |
| BATF3 | -0.4089 | 27.10 | -4.397 | 0.0000 | 0.0006 | 2.6297 |
| A0A0C4DGW9 | 0.4078 | 26.73 | 4.326 | 0.0000 | 0.0007 | 2.3515 |
| H7C2Q0 | 0.4036 | 26.52 | 4.661 | 0.0000 | 0.0003 | 3.6822 |
| X6RHB9 | 0.4028 | 27.84 | 6.056 | 0.0000 | 0.0000 | 10.0227 |
| KGP2 | 0.4007 | 24.85 | 2.789 | 0.0058 | 0.0361 | -2.6422 |
| KCC2A | 0.3976 | 30.01 | 4.493 | 0.0000 | 0.0004 | 3.0054 |
| CXB6 | -0.3946 | 27.40 | -3.745 | 0.0002 | 0.0036 | 0.2417 |
| H0YDG5 | 0.3935 | 27.68 | 4.709 | 0.0000 | 0.0002 | 3.8814 |
| PIR | 0.3906 | 25.55 | 3.491 | 0.0006 | 0.0071 | -0.5961 |
| ARAID | 0.3882 | 37.13 | 3.937 | 0.0001 | 0.0021 | 0.9114 |
| H7BZ97 | 0.3876 | 24.96 | 4.089 | 0.0001 | 0.0014 | 1.4628 |
| B8ZZQ4 | -0.3876 | 26.83 | -4.476 | 0.0000 | 0.0004 | 2.9390 |
| NDUV3.1 | -0.3875 | 26.80 | -2.841 | 0.0050 | 0.0322 | -2.5053 |
| A0A087WWT2 | -0.3844 | 26.11 | -3.805 | 0.0002 | 0.0031 | 0.4496 |
| E9PQN5 | -0.3836 | 25.91 | -5.923 | 0.0000 | 0.0000 | 9.3660 |
| GTD2A | -0.3802 | 25.47 | -3.478 | 0.0006 | 0.0073 | -0.6368 |
| G5E968 | 0.3754 | 29.60 | 4.617 | 0.0000 | 0.0003 | 3.5039 |
| C9JFK9 | 0.3741 | 28.17 | 3.282 | 0.0012 | 0.0118 | -1.2495 |
| H7C0X8 | 0.3737 | 28.31 | 3.876 | 0.0001 | 0.0026 | 0.6950 |
| B4DM24 | 0.3686 | 32.47 | 2.753 | 0.0064 | 0.0385 | -2.7355 |
| E5RG36CON__P17697 | 0.3671 | 31.65 | 5.746 | 0.0000 | 0.0000 | 8.5113 |
| S4R3A2 | 0.3642 | 29.93 | 4.802 | 0.0000 | 0.0002 | 4.2662 |
| ACTN2 | -0.3618 | 26.14 | -5.284 | 0.0000 | 0.0000 | 6.3627 |
| SYT10 | 0.3604 | 26.36 | 4.514 | 0.0000 | 0.0004 | 3.0875 |
| PADI2 | 0.3594 | 31.34 | 4.298 | 0.0000 | 0.0007 | 2.2458 |
| S4R300 | -0.3569 | 26.47 | -3.531 | 0.0005 | 0.0065 | -0.4681 |
| F8VRJ1 | -0.3564 | 31.25 | -4.168 | 0.0000 | 0.0011 | 1.7529 |
| A0A087X134 | 0.3560 | 32.85 | 5.787 | 0.0000 | 0.0000 | 8.7089 |
| K7EP01 | -0.3559 | 31.59 | -3.550 | 0.0005 | 0.0062 | -0.4069 |
| D6RJA6 | 0.3539 | 29.16 | 4.542 | 0.0000 | 0.0003 | 3.2002 |
| F2Z2Z8 | 0.3518 | 29.65 | 5.157 | 0.0000 | 0.0000 | 5.7956 |
| F8VRU3 | -0.3511 | 29.88 | -4.224 | 0.0000 | 0.0009 | 1.9640 |
| FABP7 | 0.3505 | 26.91 | 2.929 | 0.0038 | 0.0264 | -2.2665 |
| A0A0A0MTN3 | 0.3497 | 32.42 | 4.083 | 0.0001 | 0.0014 | 1.4403 |
| PLXA2 | -0.3492 | 25.44 | -2.911 | 0.0040 | 0.0277 | -2.3176 |
| F8WC57 | -0.3484 | 31.61 | -3.798 | 0.0002 | 0.0032 | 0.4249 |
| F5H2X6 | 0.3467 | 27.11 | 2.844 | 0.0049 | 0.0320 | -2.4974 |
| PYGL | 0.3445 | 24.12 | 3.684 | 0.0003 | 0.0044 | 0.0367 |
| GBG12 | 0.3443 | 29.67 | 4.903 | 0.0000 | 0.0001 | 4.6926 |
| E9PHY3 | 0.3442 | 26.96 | 5.071 | 0.0000 | 0.0001 | 5.4178 |
| K7ERU7 | 0.3366 | 24.53 | 3.626 | 0.0004 | 0.0050 | -0.1573 |
| LRC73 | -0.3340 | 25.27 | -4.153 | 0.0000 | 0.0011 | 1.6964 |
| C9J5D8 | -0.3331 | 25.82 | -4.587 | 0.0000 | 0.0003 | 3.3832 |
| STMN2 | 0.3294 | 30.56 | 3.660 | 0.0003 | 0.0047 | -0.0443 |
| Q5JXI0 | 0.3267 | 32.03 | 5.053 | 0.0000 | 0.0001 | 5.3368 |
| NTNG2 | -0.3264 | 24.18 | -5.037 | 0.0000 | 0.0001 | 5.2669 |
| SNTB1 | 0.3222 | 26.55 | 2.801 | 0.0056 | 0.0353 | -2.6108 |
| H0YJI1 | -0.3220 | 27.91 | -5.805 | 0.0000 | 0.0000 | 8.7935 |
| C2C4C | -0.3213 | 26.59 | -3.312 | 0.0011 | 0.0110 | -1.1586 |
| C9J618 | -0.3196 | 25.17 | -4.312 | 0.0000 | 0.0007 | 2.2999 |
| H3BNP3 | -0.3191 | 25.54 | -3.772 | 0.0002 | 0.0034 | 0.3350 |
| Q5T3N0 | 0.3161 | 29.34 | 2.820 | 0.0053 | 0.0337 | -2.5598 |
| C9JSB2 | 0.3149 | 25.83 | 5.002 | 0.0000 | 0.0001 | 5.1186 |
| CXA1 | 0.3135 | 31.91 | 3.381 | 0.0009 | 0.0092 | -0.9447 |
| NDUA1 | -0.3133 | 27.84 | -3.631 | 0.0004 | 0.0050 | -0.1398 |
| ADCK3 | -0.3113 | 26.75 | -3.547 | 0.0005 | 0.0062 | -0.4161 |
| AAKB2 | 0.3102 | 27.01 | 4.635 | 0.0000 | 0.0003 | 3.5762 |
| SCRG1 | -0.3093 | 24.93 | -3.892 | 0.0001 | 0.0024 | 0.7518 |
| AHNK | 0.3091 | 31.67 | 3.658 | 0.0003 | 0.0047 | -0.0505 |
| C9J6E1 | 0.3081 | 24.32 | 3.431 | 0.0007 | 0.0081 | -0.7864 |
| GBG4 | -0.3058 | 27.05 | -3.309 | 0.0011 | 0.0111 | -1.1665 |
| C9K0I3 | 0.3051 | 24.49 | 3.153 | 0.0019 | 0.0157 | -1.6337 |
| E7ERP7 | 0.3051 | 30.76 | 3.254 | 0.0013 | 0.0127 | -1.3320 |
| E9PMM2 | -0.3047 | 25.37 | -2.897 | 0.0042 | 0.0286 | -2.3559 |
| F5GXR3 | 0.3045 | 28.77 | 4.691 | 0.0000 | 0.0002 | 3.8077 |
| A0A087WYF2 | -0.3044 | 27.08 | -4.070 | 0.0001 | 0.0014 | 1.3906 |
| SMAP | 0.3029 | 27.47 | 3.219 | 0.0015 | 0.0137 | -1.4392 |
| H3BP03 | -0.3028 | 25.54 | -3.908 | 0.0001 | 0.0023 | 0.8078 |
| M0QYX4 | -0.3024 | 30.34 | -4.586 | 0.0000 | 0.0003 | 3.3766 |
Build a contrast PSP vs Control
PvsC <- makeContrasts(diagnosisPSP - diagnosisControl, levels=design)
fit_PvsC <-contrasts.fit(fit, PvsC)
bf_PvsC <- eBayes(fit_PvsC)
bf_PvsC %>% decideTests %>% summary
## diagnosisPSP - diagnosisControl
## Down 346
## NotSig 3078
## Up 521
Thus, 346 proteins are significantly downregulated and 521 upregulated in PSP versus Control samples (log2 FC cutoff is not applied here).
PvsC_top <- topTable(bf_PvsC, adjust="BH", sort.by = "logFC", number = 1000, p.value = pv_cut, lfc = fc_cut)
write.csv(PvsC_top, "./results_prot/prot_psp_vs_control.csv")
knitr::kable(PvsC_top,
caption = "Top DE proteins: PSP vs Control")
| logFC | AveExpr | t | P.Value | adj.P.Val | B | |
|---|---|---|---|---|---|---|
| C9JNG9 | -1.0261 | 28.78 | -2.655 | 0.0086 | 0.0422 | -2.9069 |
| H0YD17 | -1.0174 | 27.56 | -4.006 | 0.0001 | 0.0019 | 1.2084 |
| F5H5D6 | -0.9964 | 25.49 | -3.786 | 0.0002 | 0.0032 | 0.4361 |
| K7ES72 | -0.9227 | 26.02 | -3.130 | 0.0020 | 0.0155 | -1.6311 |
| K7EKD1 | -0.7262 | 28.46 | -4.704 | 0.0000 | 0.0003 | 3.8881 |
| CO6A1 | -0.7045 | 27.86 | -3.135 | 0.0020 | 0.0155 | -1.6183 |
| C9JH18 | -0.6400 | 33.84 | -3.758 | 0.0002 | 0.0034 | 0.3425 |
| A0A087WWP1 | -0.6125 | 28.23 | -3.655 | 0.0003 | 0.0044 | -0.0024 |
| H7C545 | -0.5931 | 29.01 | -4.520 | 0.0000 | 0.0006 | 3.1474 |
| C9JTJ8 | 0.5914 | 27.91 | 4.638 | 0.0000 | 0.0004 | 3.6184 |
| A0A087WU08 | 0.5771 | 29.40 | 2.672 | 0.0082 | 0.0409 | -2.8654 |
| SYT2 | -0.5672 | 28.24 | -2.906 | 0.0041 | 0.0252 | -2.2572 |
| ARAID | -0.5557 | 37.13 | -5.644 | 0.0000 | 0.0000 | 8.0207 |
| H7C1X2 | -0.5295 | 25.32 | -3.432 | 0.0007 | 0.0079 | -0.7217 |
| S10AA | -0.5180 | 26.09 | -4.281 | 0.0000 | 0.0011 | 2.2207 |
| CART | 0.5100 | 25.08 | 2.685 | 0.0079 | 0.0398 | -2.8338 |
| ANO1 | 0.5086 | 28.94 | 5.873 | 0.0000 | 0.0000 | 9.1119 |
| H12 | -0.5082 | 27.62 | -4.073 | 0.0001 | 0.0017 | 1.4486 |
| ZO2 | -0.5048 | 27.01 | -3.419 | 0.0008 | 0.0081 | -0.7625 |
| D6REL8CON__P02676 | 0.5043 | 29.44 | 2.684 | 0.0079 | 0.0398 | -2.8353 |
| C9JHZ9 | 0.4883 | 28.79 | 4.177 | 0.0000 | 0.0014 | 1.8294 |
| D6R9C5 | 0.4867 | 26.82 | 2.674 | 0.0081 | 0.0408 | -2.8601 |
| K7ESQ2 | -0.4780 | 27.82 | -3.373 | 0.0009 | 0.0091 | -0.9057 |
| SYNPO | 0.4771 | 30.88 | 4.099 | 0.0001 | 0.0016 | 1.5441 |
| OPA3 | 0.4693 | 26.38 | 4.654 | 0.0000 | 0.0004 | 3.6828 |
| E9PR42 | -0.4606 | 27.67 | -6.107 | 0.0000 | 0.0000 | 10.2576 |
| E9PS65 | -0.4592 | 26.09 | -3.186 | 0.0017 | 0.0138 | -1.4695 |
| E9PLK6 | -0.4585 | 30.49 | -6.060 | 0.0000 | 0.0000 | 10.0215 |
| C9JUP6 | -0.4559 | 25.69 | -4.129 | 0.0001 | 0.0015 | 1.6530 |
| H0YLE2 | -0.4421 | 30.81 | -4.008 | 0.0001 | 0.0019 | 1.2153 |
| A0A087X1B9 | -0.4409 | 25.38 | -3.879 | 0.0001 | 0.0026 | 0.7574 |
| C9JA18 | 0.4380 | 29.26 | 4.063 | 0.0001 | 0.0017 | 1.4117 |
| K7EP08 | -0.4373 | 26.26 | -4.142 | 0.0001 | 0.0014 | 1.7022 |
| KCC2A | 0.4341 | 30.01 | 4.913 | 0.0000 | 0.0002 | 4.7561 |
| H0YK70 | -0.4298 | 25.81 | -3.361 | 0.0009 | 0.0094 | -0.9437 |
| A2A2P1 | -0.4144 | 25.08 | -3.379 | 0.0009 | 0.0090 | -0.8863 |
| KV206 | 0.4118 | 25.12 | 3.802 | 0.0002 | 0.0031 | 0.4904 |
| B4DM24 | 0.4104 | 32.47 | 3.071 | 0.0024 | 0.0174 | -1.8014 |
| Q5T3N0 | -0.4099 | 29.34 | -3.662 | 0.0003 | 0.0044 | 0.0209 |
| H1X | -0.4071 | 28.21 | -3.561 | 0.0005 | 0.0056 | -0.3106 |
| STMN2 | 0.4069 | 30.56 | 4.529 | 0.0000 | 0.0006 | 3.1815 |
| KCA10 | -0.4062 | 25.50 | -3.843 | 0.0002 | 0.0029 | 0.6334 |
| CAH4 | 0.4061 | 28.55 | 3.790 | 0.0002 | 0.0032 | 0.4509 |
| A0A087WW91 | 0.4053 | 25.53 | 2.575 | 0.0108 | 0.0493 | -3.1051 |
| CLIC1 | -0.4029 | 26.64 | -3.068 | 0.0025 | 0.0175 | -1.8082 |
| CIRBP | -0.3937 | 27.70 | -3.609 | 0.0004 | 0.0050 | -0.1550 |
| B0YJC5 | -0.3932 | 34.21 | -3.008 | 0.0030 | 0.0201 | -1.9788 |
| F6THM6 | 0.3928 | 31.60 | 3.620 | 0.0004 | 0.0049 | -0.1199 |
| ASAH1 | -0.3879 | 28.57 | -4.835 | 0.0000 | 0.0002 | 4.4283 |
| F5GXC1 | -0.3873 | 26.25 | -2.837 | 0.0050 | 0.0290 | -2.4412 |
| M0QZC9 | -0.3859 | 25.12 | -5.392 | 0.0000 | 0.0000 | 6.8590 |
| Q5SZC5 | 0.3854 | 33.37 | 3.678 | 0.0003 | 0.0042 | 0.0738 |
| E9PBF6 | -0.3852 | 27.65 | -2.848 | 0.0049 | 0.0283 | -2.4135 |
| H7C2E7 | -0.3838 | 30.15 | -3.421 | 0.0008 | 0.0081 | -0.7560 |
| X6RHB9 | 0.3830 | 27.84 | 5.767 | 0.0000 | 0.0000 | 8.6012 |
| RBM3 | -0.3828 | 25.14 | -3.073 | 0.0024 | 0.0174 | -1.7950 |
| E9PFT6 | 0.3805 | 31.34 | 3.186 | 0.0017 | 0.0138 | -1.4696 |
| D6RAF1 | -0.3774 | 24.56 | -3.589 | 0.0004 | 0.0052 | -0.2208 |
| H0YDG5 | 0.3729 | 27.68 | 4.470 | 0.0000 | 0.0006 | 2.9503 |
| H0YHD8 | 0.3727 | 31.42 | 6.349 | 0.0000 | 0.0000 | 11.4729 |
| E9PMM2 | -0.3724 | 25.37 | -3.546 | 0.0005 | 0.0058 | -0.3593 |
| H0YAT3 | -0.3647 | 24.79 | -4.279 | 0.0000 | 0.0011 | 2.2138 |
| C9JFK9 | -0.3611 | 28.17 | -3.172 | 0.0017 | 0.0141 | -1.5085 |
| AHNK | -0.3605 | 31.67 | -4.272 | 0.0000 | 0.0011 | 2.1878 |
| CXA1 | -0.3596 | 31.91 | -3.885 | 0.0001 | 0.0026 | 0.7772 |
| GTD2A | -0.3592 | 25.47 | -3.292 | 0.0012 | 0.0109 | -1.1549 |
| A0A087WWY6 | -0.3587 | 27.43 | -3.912 | 0.0001 | 0.0024 | 0.8734 |
| F5H143 | 0.3582 | 30.58 | 3.488 | 0.0006 | 0.0068 | -0.5460 |
| A0A087WX08 | -0.3572 | 26.05 | -4.964 | 0.0000 | 0.0002 | 4.9733 |
| D6RCD0 | -0.3557 | 25.27 | -4.292 | 0.0000 | 0.0010 | 2.2630 |
| H7C4X7 | 0.3554 | 28.66 | 4.807 | 0.0000 | 0.0002 | 4.3132 |
| A2A2A0 | -0.3545 | 28.30 | -3.255 | 0.0013 | 0.0119 | -1.2647 |
| H7C1L2 | -0.3542 | 31.33 | -2.928 | 0.0038 | 0.0241 | -2.1979 |
| K7EQB2 | -0.3538 | 24.67 | -3.270 | 0.0013 | 0.0116 | -1.2201 |
| G8JLE9 | -0.3528 | 28.42 | -4.610 | 0.0000 | 0.0004 | 3.5075 |
| MYPT2 | 0.3514 | 28.80 | 2.984 | 0.0032 | 0.0213 | -2.0454 |
| C9J4M8 | 0.3511 | 30.80 | 3.266 | 0.0013 | 0.0116 | -1.2321 |
| 1A02 | -0.3508 | 27.12 | -2.755 | 0.0064 | 0.0349 | -2.6560 |
| C9JSB2 | 0.3477 | 25.83 | 5.532 | 0.0000 | 0.0000 | 7.5000 |
| F5GYV6 | 0.3440 | 32.18 | 2.582 | 0.0105 | 0.0489 | -3.0876 |
| RGS14 | 0.3434 | 24.91 | 4.204 | 0.0000 | 0.0013 | 1.9303 |
| Q5QPM2 | -0.3414 | 26.77 | -4.669 | 0.0000 | 0.0004 | 3.7442 |
| GID8 | 0.3405 | 25.50 | 3.384 | 0.0009 | 0.0089 | -0.8710 |
| H3BQC7 | 0.3371 | 29.51 | 3.611 | 0.0004 | 0.0050 | -0.1486 |
| FKBP5 | -0.3369 | 26.03 | -3.881 | 0.0001 | 0.0026 | 0.7653 |
| PADI2 | -0.3368 | 31.34 | -4.035 | 0.0001 | 0.0018 | 1.3109 |
| EHD2 | -0.3356 | 26.90 | -3.462 | 0.0007 | 0.0073 | -0.6289 |
| H7C1N8 | 0.3349 | 29.12 | 4.983 | 0.0000 | 0.0002 | 5.0545 |
| C9J7D8 | -0.3325 | 26.24 | -3.978 | 0.0001 | 0.0020 | 1.1051 |
| H0Y9Q1 | -0.3298 | 24.23 | -2.850 | 0.0048 | 0.0282 | -2.4085 |
| LAMB4 | -0.3288 | 27.81 | -3.641 | 0.0003 | 0.0046 | -0.0503 |
| J3KRM4 | -0.3286 | 33.04 | -4.279 | 0.0000 | 0.0011 | 2.2128 |
| MARF1 | 0.3261 | 25.32 | 5.121 | 0.0000 | 0.0001 | 5.6529 |
| I3L3L3 | -0.3250 | 26.44 | -4.197 | 0.0000 | 0.0013 | 1.9035 |
| PDLI5 | -0.3214 | 27.85 | -4.627 | 0.0000 | 0.0004 | 3.5745 |
| H0YLR3 | -0.3200 | 27.68 | -3.824 | 0.0002 | 0.0030 | 0.5680 |
| E9PS38 | 0.3183 | 28.41 | 3.212 | 0.0015 | 0.0130 | -1.3912 |
| K7EP01 | -0.3179 | 31.59 | -3.176 | 0.0017 | 0.0140 | -1.4993 |
| PVRL1 | 0.3178 | 29.09 | 4.940 | 0.0000 | 0.0002 | 4.8709 |
| PRDX6 | -0.3176 | 33.36 | -4.707 | 0.0000 | 0.0003 | 3.8982 |
| E9PQM0 | -0.3175 | 24.01 | -3.894 | 0.0001 | 0.0025 | 0.8116 |
| A0A087WY99 | -0.3174 | 25.21 | -3.944 | 0.0001 | 0.0022 | 0.9863 |
| SCN4B | -0.3165 | 25.04 | -3.130 | 0.0020 | 0.0155 | -1.6311 |
| H3BN15 | -0.3144 | 25.19 | -3.267 | 0.0013 | 0.0116 | -1.2281 |
| A6PVC3 | -0.3133 | 29.57 | -3.702 | 0.0003 | 0.0040 | 0.1536 |
| H2AZ | -0.3133 | 29.25 | -3.246 | 0.0014 | 0.0122 | -1.2908 |
| E9PL00 | 0.3133 | 29.26 | 4.751 | 0.0000 | 0.0003 | 4.0802 |
| FUBP3 | -0.3130 | 24.66 | -4.854 | 0.0000 | 0.0002 | 4.5070 |
| LY6H | 0.3128 | 30.85 | 3.736 | 0.0002 | 0.0036 | 0.2659 |
| G3XAK4 | -0.3084 | 28.57 | -5.019 | 0.0000 | 0.0002 | 5.2086 |
| PLEC | -0.3056 | 25.82 | -3.745 | 0.0002 | 0.0035 | 0.2975 |
| UBCP1 | -0.3053 | 24.84 | -3.026 | 0.0028 | 0.0193 | -1.9265 |
| RASL1 | 0.3049 | 27.57 | 2.690 | 0.0078 | 0.0395 | -2.8215 |
| SAMH1 | -0.3047 | 26.71 | -3.033 | 0.0027 | 0.0190 | -1.9083 |
| J3QRJ3 | 0.3046 | 33.29 | 5.745 | 0.0000 | 0.0000 | 8.4984 |
| CCKN | 0.3028 | 29.58 | 4.271 | 0.0000 | 0.0011 | 2.1824 |
| PLXA1 | 0.3026 | 29.67 | 3.347 | 0.0010 | 0.0097 | -0.9859 |
| ARHG6 | -0.3023 | 25.12 | -5.735 | 0.0000 | 0.0000 | 8.4503 |
| G3V368 | 0.3016 | 26.07 | 2.766 | 0.0062 | 0.0339 | -2.6274 |
Build a contrast AD vs PSP
AvsP <- makeContrasts(diagnosisAD - diagnosisPSP, levels=design)
fit_AvsP <-contrasts.fit(fit, AvsP)
bf_AvsP <- eBayes(fit_AvsP)
bf_AvsP %>% decideTests %>% summary
## diagnosisAD - diagnosisPSP
## Down 1240
## NotSig 1882
## Up 823
Thus, 1240 proteins are significantly downregulated and 823 upregulated in PSP versus Control samples (log2 FC cutoff is not applied here).
AvsP_top <- topTable(bf_AvsP, adjust="BH", sort.by = "logFC", number = 1000, p.value = pv_cut, lfc = fc_cut)
write.csv(AvsP_top, "./results_prot/prot_ad_vs_psp.csv")
knitr::kable(AvsP_top,
caption = "Top DE proteins: AD vs PSP")
| logFC | AveExpr | t | P.Value | adj.P.Val | B | |
|---|---|---|---|---|---|---|
| H0YD17 | 2.1245 | 27.56 | 11.409 | 0.0000 | 0.0000 | 42.8256 |
| CO1A1 | 1.9168 | 27.50 | 6.916 | 0.0000 | 0.0000 | 14.4058 |
| K7ES72 | 1.8781 | 26.02 | 8.689 | 0.0000 | 0.0000 | 24.9031 |
| C9JNG9 | 1.8114 | 28.78 | 6.393 | 0.0000 | 0.0000 | 11.5777 |
| F8VNV6 | 1.5912 | 27.33 | 5.489 | 0.0000 | 0.0000 | 7.0452 |
| H7C545 | 1.4217 | 29.01 | 14.777 | 0.0000 | 0.0000 | 66.1769 |
| K7EKD1 | 1.3815 | 28.46 | 12.204 | 0.0000 | 0.0000 | 48.2846 |
| PRELP | 1.2962 | 26.59 | 6.093 | 0.0000 | 0.0000 | 10.0209 |
| F5H5D6 | 1.2512 | 25.49 | 6.483 | 0.0000 | 0.0000 | 12.0564 |
| H0YL34 | 1.1763 | 28.03 | 7.344 | 0.0000 | 0.0000 | 16.8207 |
| Q05BJ3 | -1.1738 | 30.38 | -8.560 | 0.0000 | 0.0000 | 24.1038 |
| CO6A1 | 1.1617 | 27.86 | 7.050 | 0.0000 | 0.0000 | 15.1513 |
| C9J6J8 | 1.1422 | 26.19 | 7.937 | 0.0000 | 0.0000 | 20.2993 |
| GMPR1 | 1.0703 | 25.32 | 5.960 | 0.0000 | 0.0000 | 9.3496 |
| ARAID | 0.9439 | 37.13 | 13.075 | 0.0000 | 0.0000 | 54.3219 |
| C9IZ15 | -0.9310 | 27.43 | -6.042 | 0.0000 | 0.0000 | 9.7635 |
| B1PS43 | 0.9295 | 28.65 | 3.907 | 0.0001 | 0.0005 | 0.4095 |
| B0YJC5 | 0.9181 | 34.21 | 9.578 | 0.0000 | 0.0000 | 30.5775 |
| ZO2 | 0.8855 | 27.01 | 8.180 | 0.0000 | 0.0000 | 21.7680 |
| K7EP04 | 0.8564 | 27.43 | 8.307 | 0.0000 | 0.0000 | 22.5427 |
| TAU.1 | 0.8518 | 30.91 | 9.200 | 0.0000 | 0.0000 | 28.1417 |
| PGAM2 | 0.8487 | 27.85 | 7.167 | 0.0000 | 0.0000 | 15.8141 |
| PLEC | 0.8425 | 25.82 | 14.077 | 0.0000 | 0.0000 | 61.3047 |
| Q5TBF5 | 0.8360 | 25.90 | 4.299 | 0.0000 | 0.0001 | 1.8865 |
| CO4A | 0.8018 | 29.15 | 7.312 | 0.0000 | 0.0000 | 16.6395 |
| H0Y7G9 | 0.8017 | 26.57 | 8.219 | 0.0000 | 0.0000 | 22.0027 |
| F5GXC1 | 0.8016 | 26.25 | 8.009 | 0.0000 | 0.0000 | 20.7307 |
| J3QR93 | 0.7748 | 28.06 | 6.252 | 0.0000 | 0.0000 | 10.8420 |
| IP3KB | 0.7747 | 25.15 | 9.169 | 0.0000 | 0.0000 | 27.9433 |
| A0A087WWP1 | 0.7688 | 28.23 | 6.257 | 0.0000 | 0.0000 | 10.8675 |
| Q5TBU2 | 0.7560 | 29.49 | 7.437 | 0.0000 | 0.0000 | 17.3578 |
| C9JFK9 | 0.7353 | 28.17 | 8.809 | 0.0000 | 0.0000 | 25.6592 |
| Q5T3N0 | 0.7260 | 29.34 | 8.847 | 0.0000 | 0.0000 | 25.8974 |
| H0YLE2 | 0.7234 | 30.81 | 8.945 | 0.0000 | 0.0000 | 26.5163 |
| NPTX2 | -0.7152 | 25.92 | -7.314 | 0.0000 | 0.0000 | 16.6519 |
| C9J3N8 | 0.7096 | 31.71 | 10.120 | 0.0000 | 0.0000 | 34.1377 |
| SYT2 | 0.7024 | 28.24 | 4.908 | 0.0000 | 0.0000 | 4.4046 |
| A6NMN0 | 0.6998 | 29.33 | 8.358 | 0.0000 | 0.0000 | 22.8517 |
| HCN4 | -0.6965 | 29.10 | -3.555 | 0.0005 | 0.0015 | -0.8093 |
| PADI2 | 0.6962 | 31.34 | 11.374 | 0.0000 | 0.0000 | 42.5812 |
| CXA1 | 0.6731 | 31.91 | 9.916 | 0.0000 | 0.0000 | 32.7887 |
| H0YG30 | 0.6717 | 27.10 | 10.882 | 0.0000 | 0.0000 | 39.2411 |
| AHNK | 0.6696 | 31.67 | 10.823 | 0.0000 | 0.0000 | 38.8413 |
| H7C3S1 | 0.6617 | 27.73 | 3.869 | 0.0001 | 0.0005 | 0.2753 |
| CLIC1 | 0.6557 | 26.64 | 6.811 | 0.0000 | 0.0000 | 13.8297 |
| PROD | 0.6507 | 27.04 | 5.189 | 0.0000 | 0.0000 | 5.6520 |
| A0A096LP69 | 0.6400 | 27.33 | 6.287 | 0.0000 | 0.0000 | 11.0236 |
| KGP2 | 0.6400 | 24.85 | 6.084 | 0.0000 | 0.0000 | 9.9779 |
| H0Y796 | 0.6349 | 28.24 | 5.163 | 0.0000 | 0.0000 | 5.5386 |
| H7C2E7 | 0.6235 | 30.15 | 7.581 | 0.0000 | 0.0000 | 18.1916 |
| S10AA | 0.6046 | 26.09 | 6.815 | 0.0000 | 0.0000 | 13.8500 |
| AB17A | 0.5850 | 23.66 | 7.347 | 0.0000 | 0.0000 | 16.8405 |
| E9PNM5 | -0.5849 | 27.86 | -9.333 | 0.0000 | 0.0000 | 28.9934 |
| RASL1 | -0.5812 | 27.57 | -6.992 | 0.0000 | 0.0000 | 14.8263 |
| A0A087WWT2 | -0.5790 | 26.11 | -7.829 | 0.0000 | 0.0000 | 19.6531 |
| H7C0M5 | 0.5778 | 26.23 | 3.613 | 0.0004 | 0.0012 | -0.6167 |
| J3KRM4 | 0.5775 | 33.04 | 10.254 | 0.0000 | 0.0000 | 35.0244 |
| SNTB1 | 0.5767 | 26.55 | 6.847 | 0.0000 | 0.0000 | 14.0267 |
| H7C0X8 | 0.5734 | 28.31 | 8.122 | 0.0000 | 0.0000 | 21.4146 |
| S10A8 | 0.5671 | 23.77 | 5.853 | 0.0000 | 0.0000 | 8.8123 |
| R4GNC7 | 0.5595 | 27.05 | 5.104 | 0.0000 | 0.0000 | 5.2726 |
| K7ESQ2 | 0.5507 | 27.82 | 5.300 | 0.0000 | 0.0000 | 6.1609 |
| F6THM6 | -0.5495 | 31.60 | -6.907 | 0.0000 | 0.0000 | 14.3573 |
| F2Z2Z8 | 0.5449 | 29.65 | 10.911 | 0.0000 | 0.0000 | 39.4328 |
| B5MCB5 | 0.5446 | 27.33 | 3.724 | 0.0003 | 0.0009 | -0.2367 |
| C9JZU8 | -0.5322 | 26.00 | -5.958 | 0.0000 | 0.0000 | 9.3370 |
| PLCD1 | 0.5311 | 29.49 | 10.629 | 0.0000 | 0.0000 | 37.5318 |
| Q5TBN5 | 0.5214 | 26.03 | 3.982 | 0.0001 | 0.0004 | 0.6828 |
| H7BZ97 | 0.5164 | 24.96 | 7.442 | 0.0000 | 0.0000 | 17.3837 |
| C2C4C | -0.5138 | 26.59 | -7.233 | 0.0000 | 0.0000 | 16.1869 |
| R4GN98 | 0.5133 | 29.10 | 3.819 | 0.0002 | 0.0006 | 0.0956 |
| F8VRJ1 | -0.5093 | 31.25 | -8.136 | 0.0000 | 0.0000 | 21.4986 |
| HMGN2 | 0.5082 | 29.70 | 5.548 | 0.0000 | 0.0000 | 7.3282 |
| PRDX6 | 0.5049 | 33.36 | 10.205 | 0.0000 | 0.0000 | 34.6998 |
| HECAM | 0.5021 | 30.99 | 10.208 | 0.0000 | 0.0000 | 34.7244 |
| H0YFB9 | -0.5009 | 29.19 | -8.043 | 0.0000 | 0.0000 | 20.9380 |
| Q5T3J1 | 0.4997 | 31.31 | 4.834 | 0.0000 | 0.0000 | 4.0851 |
| C9J6E1 | 0.4965 | 24.32 | 7.553 | 0.0000 | 0.0000 | 18.0280 |
| K7ER39 | 0.4917 | 28.07 | 7.575 | 0.0000 | 0.0000 | 18.1586 |
| E7EUI6 | 0.4896 | 26.16 | 8.162 | 0.0000 | 0.0000 | 21.6571 |
| C9JHZ9 | -0.4895 | 28.79 | -5.711 | 0.0000 | 0.0000 | 8.1144 |
| BATF3 | -0.4894 | 27.10 | -7.188 | 0.0000 | 0.0000 | 15.9329 |
| D6RCN3 | 0.4885 | 32.07 | 7.964 | 0.0000 | 0.0000 | 20.4590 |
| A0A087X134 | 0.4881 | 32.85 | 10.838 | 0.0000 | 0.0000 | 38.9419 |
| H3BV46 | -0.4872 | 24.93 | -6.507 | 0.0000 | 0.0000 | 12.1811 |
| F2Z2Q2 | 0.4861 | 26.40 | 4.578 | 0.0000 | 0.0000 | 3.0053 |
| CYTB | 0.4796 | 27.86 | 7.152 | 0.0000 | 0.0000 | 15.7255 |
| H0Y933 | 0.4790 | 24.95 | 11.312 | 0.0000 | 0.0000 | 42.1605 |
| A6PVC3 | 0.4769 | 29.57 | 7.684 | 0.0000 | 0.0000 | 18.8003 |
| FABP7 | 0.4750 | 26.91 | 5.423 | 0.0000 | 0.0000 | 6.7362 |
| PLEC.1 | 0.4748 | 35.30 | 10.998 | 0.0000 | 0.0000 | 40.0289 |
| F5H1F2 | 0.4744 | 27.03 | 8.505 | 0.0000 | 0.0000 | 23.7568 |
| H3BP73 | 0.4734 | 27.71 | 6.335 | 0.0000 | 0.0000 | 11.2742 |
| H7C1X2 | 0.4732 | 25.32 | 4.183 | 0.0000 | 0.0002 | 1.4359 |
| E9PJ32 | 0.4721 | 28.42 | 3.353 | 0.0010 | 0.0028 | -1.4639 |
| HPCA | -0.4704 | 28.92 | -9.256 | 0.0000 | 0.0000 | 28.5001 |
| GPC5B | 0.4694 | 27.45 | 4.531 | 0.0000 | 0.0001 | 2.8128 |
| HNMT | 0.4688 | 26.74 | 5.216 | 0.0000 | 0.0000 | 5.7771 |
| H7C2Q0 | 0.4678 | 26.52 | 7.380 | 0.0000 | 0.0000 | 17.0282 |
| GBG12 | 0.4668 | 29.67 | 9.080 | 0.0000 | 0.0000 | 27.3713 |
| J3KT01 | 0.4640 | 31.12 | 3.871 | 0.0001 | 0.0005 | 0.2801 |
| CYBR1 | 0.4640 | 26.78 | 6.111 | 0.0000 | 0.0000 | 10.1135 |
| J3QKL8 | -0.4617 | 27.01 | -5.813 | 0.0000 | 0.0000 | 8.6142 |
| NFH | 0.4582 | 33.03 | 5.304 | 0.0000 | 0.0000 | 6.1834 |
| E9PKC9 | 0.4567 | 25.90 | 6.775 | 0.0000 | 0.0000 | 13.6287 |
| S10AB | 0.4541 | 27.99 | 3.914 | 0.0001 | 0.0005 | 0.4356 |
| RTN4 | -0.4531 | 26.83 | -9.029 | 0.0000 | 0.0000 | 27.0486 |
| SPTN1 | 0.4522 | 27.80 | 7.539 | 0.0000 | 0.0000 | 17.9465 |
| E9PGA7 | 0.4518 | 32.54 | 9.233 | 0.0000 | 0.0000 | 28.3498 |
| Q5T8Q9 | 0.4492 | 27.55 | 2.890 | 0.0043 | 0.0103 | -2.8345 |
| DLGP2 | -0.4443 | 25.64 | -4.918 | 0.0000 | 0.0000 | 4.4478 |
| E7ERP7 | 0.4363 | 30.76 | 6.357 | 0.0000 | 0.0000 | 11.3905 |
| B8ZZJ2 | 0.4356 | 27.05 | 2.981 | 0.0032 | 0.0081 | -2.5801 |
| F8WCH0 | 0.4343 | 33.37 | 3.971 | 0.0001 | 0.0004 | 0.6429 |
| A0A087WX08 | 0.4333 | 26.05 | 8.210 | 0.0000 | 0.0000 | 21.9511 |
| K7ELP4 | 0.4330 | 25.27 | 6.358 | 0.0000 | 0.0000 | 11.3920 |
| B0QY64 | 0.4320 | 24.98 | 4.371 | 0.0000 | 0.0001 | 2.1698 |
| D6RAU9 | -0.4305 | 26.56 | -6.603 | 0.0000 | 0.0000 | 12.6960 |
| MARF1 | -0.4301 | 25.32 | -9.212 | 0.0000 | 0.0000 | 28.2138 |
| D6R9P2 | 0.4254 | 28.64 | 7.898 | 0.0000 | 0.0000 | 20.0643 |
| ANXA4 | 0.4244 | 28.10 | 9.250 | 0.0000 | 0.0000 | 28.4587 |
| C9J1K7 | 0.4207 | 26.64 | 5.990 | 0.0000 | 0.0000 | 9.5002 |
| GHC2 | 0.4170 | 27.14 | 6.587 | 0.0000 | 0.0000 | 12.6103 |
| H0Y4F5 | 0.4161 | 30.98 | 5.827 | 0.0000 | 0.0000 | 8.6876 |
| TAU.3 | 0.4160 | 26.73 | 7.409 | 0.0000 | 0.0000 | 17.1950 |
| K7ELJ5 | -0.4147 | 25.64 | -7.792 | 0.0000 | 0.0000 | 19.4356 |
| D6RAF1 | 0.4123 | 24.56 | 5.348 | 0.0000 | 0.0000 | 6.3844 |
| H0YFX9 | 0.4110 | 30.41 | 7.944 | 0.0000 | 0.0000 | 20.3411 |
| ACBG2 | 0.4105 | 26.51 | 5.597 | 0.0000 | 0.0000 | 7.5633 |
| A8MRB1 | 0.4104 | 32.73 | 6.924 | 0.0000 | 0.0000 | 14.4499 |
| H3BQC7 | -0.4098 | 29.51 | -5.987 | 0.0000 | 0.0000 | 9.4856 |
| NPTXR | -0.4079 | 28.76 | -5.045 | 0.0000 | 0.0000 | 5.0047 |
| PDLI5 | 0.4074 | 27.85 | 7.997 | 0.0000 | 0.0000 | 20.6604 |
| CATZ | 0.4073 | 25.34 | 7.148 | 0.0000 | 0.0000 | 15.7039 |
| K7EP08 | 0.4072 | 26.26 | 5.260 | 0.0000 | 0.0000 | 5.9781 |
| AL1L2 | 0.4054 | 31.75 | 9.570 | 0.0000 | 0.0000 | 30.5283 |
| H7C5L4 | 0.4045 | 26.19 | 5.624 | 0.0000 | 0.0000 | 7.6922 |
| A0A0C4DGC5 | 0.4043 | 31.34 | 8.518 | 0.0000 | 0.0000 | 23.8414 |
| CSPG2 | 0.4025 | 33.30 | 7.218 | 0.0000 | 0.0000 | 16.1037 |
| ASAH1 | 0.4018 | 28.57 | 6.830 | 0.0000 | 0.0000 | 13.9341 |
| MERL | 0.4017 | 32.14 | 9.287 | 0.0000 | 0.0000 | 28.6966 |
| C9J7C7 | 0.3995 | 27.51 | 8.019 | 0.0000 | 0.0000 | 20.7914 |
| MAP4 | 0.3993 | 30.77 | 9.791 | 0.0000 | 0.0000 | 31.9677 |
| C9J7D8 | 0.3990 | 26.24 | 6.509 | 0.0000 | 0.0000 | 12.1926 |
| A0A0C4DFY8 | -0.3971 | 27.36 | -3.759 | 0.0002 | 0.0008 | -0.1144 |
| A0A0C4DGZ9 | 0.3966 | 31.03 | 9.404 | 0.0000 | 0.0000 | 29.4498 |
| Q5SZU1 | 0.3962 | 31.53 | 8.229 | 0.0000 | 0.0000 | 22.0640 |
| U3KQQ1 | 0.3936 | 33.86 | 7.686 | 0.0000 | 0.0000 | 18.8097 |
| ACTN2 | -0.3925 | 26.14 | -7.831 | 0.0000 | 0.0000 | 19.6681 |
| B4DUT8 | 0.3911 | 28.46 | 6.595 | 0.0000 | 0.0000 | 12.6560 |
| A0A087WWY6 | 0.3896 | 27.43 | 5.795 | 0.0000 | 0.0000 | 8.5287 |
| G3V3R6 | 0.3893 | 26.66 | 6.425 | 0.0000 | 0.0000 | 11.7472 |
| H0YK70 | 0.3891 | 25.81 | 4.150 | 0.0000 | 0.0002 | 1.3099 |
| F5H617 | -0.3888 | 29.15 | -5.909 | 0.0000 | 0.0000 | 9.0916 |
| E9PFT6 | -0.3878 | 31.34 | -4.428 | 0.0000 | 0.0001 | 2.3974 |
| D6RJA3 | -0.3876 | 25.57 | -6.038 | 0.0000 | 0.0000 | 9.7421 |
| H2A1D | 0.3853 | 32.90 | 6.513 | 0.0000 | 0.0000 | 12.2134 |
| D6RGJ1 | 0.3847 | 26.41 | 3.917 | 0.0001 | 0.0005 | 0.4474 |
| H0Y592 | 0.3836 | 26.16 | 5.716 | 0.0000 | 0.0000 | 8.1374 |
| H0Y972 | -0.3831 | 26.09 | -6.208 | 0.0000 | 0.0000 | 10.6121 |
| A0A087X1B9 | 0.3825 | 25.38 | 4.589 | 0.0000 | 0.0000 | 3.0526 |
| PTRF | 0.3817 | 28.48 | 6.976 | 0.0000 | 0.0000 | 14.7425 |
| F5GXS0 | 0.3813 | 25.97 | 4.437 | 0.0000 | 0.0001 | 2.4342 |
| C9IZA0 | 0.3812 | 26.84 | 6.740 | 0.0000 | 0.0000 | 13.4394 |
| C9J8K5 | 0.3806 | 26.70 | 7.699 | 0.0000 | 0.0000 | 18.8893 |
| Q5RI18 | 0.3796 | 25.43 | 6.292 | 0.0000 | 0.0000 | 11.0509 |
| ACBD7 | 0.3795 | 29.79 | 6.806 | 0.0000 | 0.0000 | 13.8005 |
| A0A096LPE1 | 0.3794 | 29.79 | 8.616 | 0.0000 | 0.0000 | 24.4499 |
| F8WE61 | 0.3790 | 28.00 | 5.274 | 0.0000 | 0.0000 | 6.0438 |
| H2BFS | 0.3789 | 30.18 | 5.306 | 0.0000 | 0.0000 | 6.1913 |
| DOCK3 | -0.3785 | 26.78 | -6.364 | 0.0000 | 0.0000 | 11.4244 |
| E5RIX3 | -0.3779 | 29.88 | -6.491 | 0.0000 | 0.0000 | 12.0954 |
| PARVA | 0.3775 | 26.81 | 7.987 | 0.0000 | 0.0000 | 20.5974 |
| H0Y7N1 | 0.3766 | 28.87 | 9.623 | 0.0000 | 0.0000 | 30.8693 |
| C1QC | 0.3764 | 26.36 | 3.886 | 0.0001 | 0.0005 | 0.3361 |
| RGS14 | -0.3746 | 24.91 | -6.254 | 0.0000 | 0.0000 | 10.8500 |
| J3QQX8 | 0.3725 | 26.19 | 5.218 | 0.0000 | 0.0000 | 5.7871 |
| TPD53 | 0.3719 | 26.85 | 6.255 | 0.0000 | 0.0000 | 10.8546 |
| D6RC12 | -0.3699 | 26.10 | -5.905 | 0.0000 | 0.0000 | 9.0719 |
| PLXB1 | 0.3698 | 27.17 | 5.015 | 0.0000 | 0.0000 | 4.8724 |
| D6R9P1 | 0.3696 | 27.76 | 3.082 | 0.0023 | 0.0061 | -2.2890 |
| CES1P | 0.3690 | 33.86 | 3.268 | 0.0013 | 0.0036 | -1.7307 |
| ANO1 | -0.3684 | 28.94 | -5.801 | 0.0000 | 0.0000 | 8.5558 |
| H0YAT3 | 0.3681 | 24.79 | 5.890 | 0.0000 | 0.0000 | 8.9968 |
| FUBP3 | 0.3673 | 24.66 | 7.768 | 0.0000 | 0.0000 | 19.2921 |
| E5RFY9 | 0.3672 | 26.13 | 6.100 | 0.0000 | 0.0000 | 10.0560 |
| PIR | 0.3669 | 25.55 | 4.479 | 0.0000 | 0.0001 | 2.6004 |
| LAMB4 | 0.3662 | 27.81 | 5.530 | 0.0000 | 0.0000 | 7.2426 |
| Q5JT49 | 0.3647 | 27.14 | 4.865 | 0.0000 | 0.0000 | 4.2194 |
| H7C457 | 0.3630 | 25.14 | 5.429 | 0.0000 | 0.0000 | 6.7627 |
| M0QZP8 | -0.3624 | 27.60 | -8.128 | 0.0000 | 0.0000 | 21.4517 |
| PLXA2 | -0.3616 | 25.44 | -4.117 | 0.0001 | 0.0002 | 1.1849 |
| D6RFY4 | -0.3608 | 27.46 | -6.983 | 0.0000 | 0.0000 | 14.7783 |
| LY6H | -0.3599 | 30.85 | -5.862 | 0.0000 | 0.0000 | 8.8581 |
| MACD1 | 0.3597 | 27.82 | 7.479 | 0.0000 | 0.0000 | 17.6023 |
| D6REM0 | -0.3588 | 27.01 | -5.448 | 0.0000 | 0.0000 | 6.8543 |
| NDUV3.1 | -0.3585 | 26.80 | -3.590 | 0.0004 | 0.0013 | -0.6934 |
| TBB2B | 0.3576 | 28.44 | 3.204 | 0.0016 | 0.0043 | -1.9264 |
| A6PVK2 | -0.3572 | 24.55 | -6.813 | 0.0000 | 0.0000 | 13.8406 |
| E9PBF6 | 0.3563 | 27.65 | 3.593 | 0.0004 | 0.0013 | -0.6845 |
| H0Y7G2 | -0.3550 | 29.46 | -7.516 | 0.0000 | 0.0000 | 17.8121 |
| SCN4B | 0.3546 | 25.04 | 4.783 | 0.0000 | 0.0000 | 3.8643 |
| E5RG36CON__P17697 | 0.3546 | 31.65 | 7.582 | 0.0000 | 0.0000 | 18.2008 |
| SHAN2 | -0.3543 | 26.86 | -5.463 | 0.0000 | 0.0000 | 6.9242 |
| C9JUP6 | 0.3541 | 25.69 | 4.373 | 0.0000 | 0.0001 | 2.1776 |
| VAMP1 | 0.3536 | 29.14 | 6.138 | 0.0000 | 0.0000 | 10.2546 |
| I3L0T3 | 0.3527 | 27.55 | 3.553 | 0.0005 | 0.0015 | -0.8168 |
| E9PQS8 | -0.3526 | 26.55 | -7.308 | 0.0000 | 0.0000 | 16.6143 |
| H0YIZ1 | 0.3522 | 25.96 | 5.104 | 0.0000 | 0.0000 | 5.2714 |
| RBM3 | 0.3500 | 25.14 | 3.832 | 0.0002 | 0.0006 | 0.1436 |
| ARHGP | -0.3490 | 25.97 | -4.081 | 0.0001 | 0.0003 | 1.0502 |
| TPPP3 | 0.3478 | 30.63 | 6.098 | 0.0000 | 0.0000 | 10.0494 |
| A0A087WW77 | 0.3473 | 29.04 | 8.957 | 0.0000 | 0.0000 | 26.5909 |
| VA0D2 | -0.3456 | 28.44 | -4.859 | 0.0000 | 0.0000 | 4.1936 |
| 1B37 | 0.3452 | 25.54 | 4.327 | 0.0000 | 0.0001 | 1.9935 |
| C9JWY7 | 0.3445 | 29.29 | 9.140 | 0.0000 | 0.0000 | 27.7582 |
| X6R242 | 0.3431 | 27.18 | 3.323 | 0.0011 | 0.0030 | -1.5592 |
| ADDG | 0.3429 | 31.65 | 11.410 | 0.0000 | 0.0000 | 42.8286 |
| H12 | 0.3428 | 27.62 | 3.746 | 0.0002 | 0.0008 | -0.1591 |
| CAV3 | 0.3425 | 26.71 | 4.920 | 0.0000 | 0.0000 | 4.4580 |
| MRP | 0.3388 | 28.53 | 7.517 | 0.0000 | 0.0000 | 17.8192 |
| CKLF5 | 0.3387 | 26.50 | 4.356 | 0.0000 | 0.0001 | 2.1104 |
| H3BSP9 | 0.3380 | 31.04 | 5.224 | 0.0000 | 0.0000 | 5.8157 |
| H0YFB7 | 0.3376 | 26.90 | 5.498 | 0.0000 | 0.0000 | 7.0897 |
| CLIC5 | 0.3370 | 29.44 | 5.124 | 0.0000 | 0.0000 | 5.3619 |
| SERC | 0.3360 | 31.04 | 8.639 | 0.0000 | 0.0000 | 24.5936 |
| C9JDT0 | -0.3359 | 30.46 | -7.025 | 0.0000 | 0.0000 | 15.0133 |
| GBG5 | 0.3357 | 29.22 | 4.994 | 0.0000 | 0.0000 | 4.7811 |
| H3BMD4 | -0.3329 | 25.61 | -4.842 | 0.0000 | 0.0000 | 4.1176 |
| E9PNP4 | 0.3328 | 31.82 | 7.710 | 0.0000 | 0.0000 | 18.9499 |
| E5RHA9 | -0.3315 | 28.16 | -7.380 | 0.0000 | 0.0000 | 17.0277 |
| F8VU44 | -0.3308 | 25.84 | -5.916 | 0.0000 | 0.0000 | 9.1293 |
| F8VXJ5 | 0.3303 | 28.74 | 2.730 | 0.0069 | 0.0156 | -3.2649 |
| DLGP3 | -0.3296 | 27.60 | -5.889 | 0.0000 | 0.0000 | 8.9944 |
| B5MC71 | 0.3291 | 26.82 | 3.815 | 0.0002 | 0.0006 | 0.0822 |
| Q5T0I0 | 0.3286 | 33.42 | 7.506 | 0.0000 | 0.0000 | 17.7562 |
| RTN1 | -0.3283 | 29.60 | -7.909 | 0.0000 | 0.0000 | 20.1336 |
| HUG1 | 0.3281 | 28.59 | 8.043 | 0.0000 | 0.0000 | 20.9391 |
| H7C144 | -0.3279 | 26.84 | -7.452 | 0.0000 | 0.0000 | 17.4417 |
| J3KS57 | 0.3271 | 26.03 | 4.951 | 0.0000 | 0.0000 | 4.5906 |
| H0Y3R8 | 0.3269 | 28.55 | 7.546 | 0.0000 | 0.0000 | 17.9879 |
| SYNPO | -0.3268 | 30.88 | -3.829 | 0.0002 | 0.0006 | 0.1317 |
| VAMP3 | 0.3268 | 25.34 | 6.671 | 0.0000 | 0.0000 | 13.0651 |
| H3BT58 | 0.3263 | 30.10 | 8.707 | 0.0000 | 0.0000 | 25.0150 |
| H0Y921 | 0.3252 | 28.36 | 7.668 | 0.0000 | 0.0000 | 18.7063 |
| E9PS65 | 0.3250 | 26.09 | 3.075 | 0.0024 | 0.0062 | -2.3085 |
| A2A3C4 | -0.3245 | 24.65 | -5.750 | 0.0000 | 0.0000 | 8.3037 |
| GBG4 | -0.3243 | 27.05 | -4.793 | 0.0000 | 0.0000 | 3.9090 |
| S4R3U1 | 0.3239 | 26.14 | 5.248 | 0.0000 | 0.0000 | 5.9257 |
| H7C1E7 | 0.3221 | 29.92 | 3.119 | 0.0021 | 0.0055 | -2.1792 |
| TRI25 | 0.3221 | 24.71 | 3.740 | 0.0002 | 0.0008 | -0.1795 |
| E9PJM7 | 0.3219 | 26.10 | 4.803 | 0.0000 | 0.0000 | 3.9494 |
| B1AQP8 | 0.3207 | 29.11 | 6.773 | 0.0000 | 0.0000 | 13.6174 |
| J3KRN1 | 0.3196 | 27.23 | 6.641 | 0.0000 | 0.0000 | 12.9015 |
| A0A087WUT1 | 0.3193 | 30.10 | 5.415 | 0.0000 | 0.0000 | 6.6981 |
| OPA3 | -0.3192 | 26.38 | -4.316 | 0.0000 | 0.0001 | 1.9531 |
| GSTT1 | 0.3184 | 27.87 | 2.396 | 0.0175 | 0.0350 | -4.0888 |
| H0YLF3 | 0.3178 | 29.08 | 4.408 | 0.0000 | 0.0001 | 2.3178 |
| H1T | 0.3172 | 31.84 | 5.554 | 0.0000 | 0.0000 | 7.3562 |
| H3BPG5 | 0.3170 | 24.99 | 5.389 | 0.0000 | 0.0000 | 6.5773 |
| CTGE8 | -0.3163 | 27.28 | -6.221 | 0.0000 | 0.0000 | 10.6824 |
| K7ENJ7 | 0.3160 | 28.89 | 8.133 | 0.0000 | 0.0000 | 21.4773 |
| EHD2 | 0.3158 | 26.90 | 4.442 | 0.0000 | 0.0001 | 2.4529 |
| H3BRQ4 | 0.3143 | 26.82 | 3.809 | 0.0002 | 0.0007 | 0.0610 |
| C1QL2 | -0.3136 | 26.53 | -6.512 | 0.0000 | 0.0000 | 12.2081 |
| GRIK2 | -0.3124 | 27.08 | -4.819 | 0.0000 | 0.0000 | 4.0200 |
| I3L1K1 | -0.3124 | 25.74 | -5.096 | 0.0000 | 0.0000 | 5.2331 |
| H7C5B5 | -0.3119 | 24.89 | -4.730 | 0.0000 | 0.0000 | 3.6406 |
| AOFB | 0.3111 | 32.40 | 6.271 | 0.0000 | 0.0000 | 10.9414 |
| A0A087WW91 | -0.3110 | 25.53 | -2.694 | 0.0077 | 0.0171 | -3.3581 |
| TLN1 | 0.3084 | 29.94 | 8.015 | 0.0000 | 0.0000 | 20.7661 |
| H3BQZ3 | 0.3084 | 30.16 | 8.184 | 0.0000 | 0.0000 | 21.7917 |
| J3QSF4 | -0.3077 | 26.28 | -6.427 | 0.0000 | 0.0000 | 11.7569 |
| ADCK3 | -0.3068 | 26.75 | -4.775 | 0.0000 | 0.0000 | 3.8320 |
| CAPZB | 0.3067 | 26.92 | 5.750 | 0.0000 | 0.0000 | 8.3043 |
| SNX32 | -0.3067 | 23.79 | -4.210 | 0.0000 | 0.0002 | 1.5394 |
| J3QRJ3 | -0.3065 | 33.29 | -7.885 | 0.0000 | 0.0000 | 19.9883 |
| TSPOA | 0.3051 | 25.97 | 6.795 | 0.0000 | 0.0000 | 13.7385 |
| 1A02 | 0.3051 | 27.12 | 3.267 | 0.0013 | 0.0036 | -1.7333 |
| E9PJJ8 | 0.3043 | 27.59 | 6.115 | 0.0000 | 0.0000 | 10.1371 |
| ADXL | -0.3041 | 30.01 | -7.205 | 0.0000 | 0.0000 | 16.0268 |
| H3BNK3 | -0.3035 | 28.22 | -7.938 | 0.0000 | 0.0000 | 20.3055 |
| H90B4 | -0.3019 | 29.25 | -5.312 | 0.0000 | 0.0000 | 6.2200 |
| SAMH1 | 0.3012 | 26.71 | 4.087 | 0.0001 | 0.0003 | 1.0748 |
| F8W8D9 | -0.3009 | 27.84 | -4.485 | 0.0000 | 0.0001 | 2.6276 |
| E5RG14 | -0.3005 | 29.08 | -4.291 | 0.0000 | 0.0001 | 1.8554 |
| LRC73 | -0.3003 | 25.27 | -5.100 | 0.0000 | 0.0000 | 5.2510 |
| Q6IPE9 | -0.3002 | 27.61 | -5.657 | 0.0000 | 0.0000 | 7.8541 |
dat_fc_pv <- bind_cols(ProteinIDs = rownames(data_imp),
fc_control_ad = topTable(bf_AvsC, sort = "none", n = Inf)$logFC,
fc_control_psp = topTable(bf_PvsC, sort = "none", n = Inf)$logFC,
fc_ad_psp = topTable(bf_AvsP, sort = "none", n = Inf)$logFC,
ad_control_pv = topTable(bf_AvsC, sort = "none", n = Inf)$adj.P.Val,
psp_control_pv = topTable(bf_PvsC, sort = "none", n = Inf)$adj.P.Val,
ad_psp_pv = topTable(bf_AvsP, sort = "none", n = Inf)$adj.P.Val,
PIDshort = rownames(data_norm))
The results are collected in an Excel file, the name of which includes the criteria (adjusted p-values and log2FC). If an Excel file with the same name and page exists, then the data is not overwritten and an error message appears. Therefore, before running the code again with the same parameters (the same adjusted p-values+FC), you need to delete this file.
AD_C_UP <- dat_fc_pv %>%
filter(ad_control_pv <= pv_cut) %>%
filter(fc_control_ad >= fc_cut) %>%
arrange(-abs(fc_control_ad)) %>%
dplyr::select(PIDshort, ad_control_pv, fc_control_ad)
AD_C_DOWN <- dat_fc_pv %>%
filter(ad_control_pv <= pv_cut) %>%
filter(fc_control_ad <= -fc_cut) %>%
arrange(-abs(fc_control_ad)) %>%
dplyr::select(PIDshort, ad_control_pv, fc_control_ad)
PSP_C_UP <- dat_fc_pv %>%
filter(psp_control_pv <= pv_cut) %>%
filter(fc_control_psp >= fc_cut) %>%
arrange(-abs(fc_control_psp)) %>%
dplyr::select(PIDshort, psp_control_pv, fc_control_psp)
PSP_C_DOWN <- dat_fc_pv %>%
filter(psp_control_pv <= pv_cut) %>%
filter(fc_control_psp <= -fc_cut) %>%
arrange(-abs(fc_control_psp)) %>%
dplyr::select(PIDshort, psp_control_pv, fc_control_psp)
AD_PSP_UP <- dat_fc_pv %>%
filter(ad_psp_pv <= pv_cut) %>%
filter(fc_ad_psp >= fc_cut) %>%
arrange(-abs(fc_ad_psp)) %>%
dplyr::select(PIDshort, ad_psp_pv, fc_ad_psp)
AD_PSP_DOWN <- dat_fc_pv %>%
filter(ad_psp_pv <= pv_cut) %>%
filter(fc_ad_psp <= -fc_cut) %>%
arrange(-abs(fc_ad_psp)) %>%
dplyr::select(PIDshort, ad_psp_pv, fc_ad_psp)
require(xlsx)
write.xlsx(AD_C_UP,
file = paste0("./results_prot/DE_genes_p-value_", pv_cut, "_log2FC_", fc_cut, ".xlsx"),
append = TRUE,
sheetName = "AD vs Control UP")
write.xlsx(AD_C_DOWN,
file = paste0("./results_prot/DE_genes_p-value_", pv_cut, "_log2FC_", fc_cut, ".xlsx"),
append = TRUE,
sheetName = "AD vs Control DOWN")
write.xlsx(PSP_C_UP,
file = paste0("./results_prot/DE_genes_p-value_", pv_cut, "_log2FC_", fc_cut, ".xlsx"),
append = TRUE,
sheetName = "PSP vs Control UP")
write.xlsx(PSP_C_DOWN,
file = paste0("./results_prot/DE_genes_p-value_", pv_cut, "_log2FC_", fc_cut, ".xlsx"),
append = TRUE,
sheetName = "PSP vs Control DOWN")
write.xlsx(AD_PSP_UP,
file = paste0("./results_prot/DE_genes_p-value_", pv_cut, "_log2FC_", fc_cut, ".xlsx"),
append = TRUE,
sheetName = "AD vs PSP UP")
write.xlsx(AD_PSP_DOWN,
file = paste0("./results_prot/DE_genes_p-value_", pv_cut, "_log2FC_", fc_cut, ".xlsx"),
append = TRUE,
sheetName = "AD vs PSP DOWN")
Prepare an object for clustering
data_log2 <- data_norm %>% log2
d <- data_log2
## Set clear column names
colnames(d) <- c(rep("Control", 31), rep("AD", 84), rep("PSP", 85)) %>% make.unique
## add columns with short protein IDs, log2 folds, adjusted p-values
data_all <- data.frame(d, dat_fc_pv)
rm(d)
Filter the data for AD vs Control according to a threshold of significance: adjusted p-values 0.05 and log2 fold change to 0.3.
dat_filt_ad <- data_all %>% filter(abs(fc_control_ad) >= fc_cut &
ad_control_pv <= pv_cut) %>%
arrange(-abs(fc_control_ad)) %>%
## slice_head(n=25) %>%
dplyr::select(PIDshort, (starts_with("AD.") | starts_with("Control")) )
Transform to matrix
## data.matrix() leaves numeric data as is.
ad_matrix <- data.matrix(dat_filt_ad)
rownames(ad_matrix) <- dat_filt_ad$PIDshort
ad_matrix <- ad_matrix[,-1]
ad_scaled <- ad_matrix %>% t %>% scale %>% t
Euclidean distance between experiments: AD & Control
ad_dist <- ad_scaled %>% t %>%
dist(., method = "euclidean", diag = FALSE, upper = FALSE)
Euclidean distance between proteins: AD & Control
ad_dist_prot <- ad_scaled %>%
dist(., method = "euclidean", diag = FALSE, upper = FALSE)
Hierarchical Ward’s clustering
ad_hclust <- hclust(ad_dist, method = "ward.D2", members = NULL)
ad_hclust_prot <- hclust(ad_dist_prot, method = "ward.D2", members = NULL)
png("./figs_prot/hclust_ad.png", width = 2500, height = 3000, res = 300)
par(mfrow = c(2,1))
ad_hclust %>% plot(cex = 0.4, main = "Conditions (Number of samples: AD = 84, Control = 31)")
ad_hclust_prot %>% plot(cex = 0.6, main = "Proteins (Number of samples: AD = 84, Control = 31)")
dev.off()
## png
## 2
# Set colours for heatmap, 25 increments
my_palette <- colorRampPalette(c("blue","white","red"))(n = 25)
png("./figs_prot/heatmap_ad.png", width = 2000, height = 2000, res = 300)
# Plot heatmap with heatmap.2
par(cex.main=0.75) # Shrink title fonts on plot
ad_scaled %>%
# Plot heatmap
gplots::heatmap.2(., # Tidy, normalised data
Colv=as.dendrogram(ad_hclust), # Experiments clusters in cols
Rowv=as.dendrogram(ad_hclust_prot), # Protein clusters in rows
revC=TRUE, # Flip plot to match pheatmap
density.info="histogram", # Plot histogram of data and colour key
trace="none", # Turn of trace lines from heat map
col = my_palette, # Use my colour scheme
cexRow=0.3,cexCol=0.2, # Amend row and column label fonts
xlab = paste0("Number of samples: AD = ", length(ad_cols), ", Control = ", length(control_cols))
)
dev.off()
## png
## 2
Filter the data for PSP vs Control according to a threshold of significance: adjusted p-values 0.05 and log2 fold change to 0.3.
dat_filt_psp <- data_all %>% filter(abs(fc_control_psp) >= fc_cut &
psp_control_pv <= pv_cut) %>%
arrange(-abs(fc_control_psp)) %>%
## slice_head(n=25) %>%
dplyr::select(PIDshort, (starts_with("PSP.") | starts_with("Control")) )
Transform to matrix
## data.matrix() leaves numeric data as is.
psp_matrix <- data.matrix(dat_filt_psp)
rownames(psp_matrix) <- dat_filt_psp$PIDshort
psp_matrix <- psp_matrix[,-1]
psp_scaled <- psp_matrix %>% t %>% scale %>% t
Euclidean distance between experiments: AD & Control
psp_dist <- psp_scaled %>% t %>%
dist(., method = "euclidean", diag = FALSE, upper = FALSE)
Euclidean distance between proteins: AD & Control
psp_dist_prot <- psp_scaled %>%
dist(., method = "euclidean", diag = FALSE, upper = FALSE)
Hierarchical Ward’s clustering
psp_hclust <- hclust(psp_dist, method = "ward.D2", members = NULL)
psp_hclust_prot <- hclust(psp_dist_prot, method = "ward.D2", members = NULL)
png("./figs_prot/hclust_psp.png", width = 2500, height = 3000, res = 300)
par(mfrow = c(2,1))
psp_hclust %>% plot(cex = 0.4, main = "Conditions (Number of samples: PSP = 85, Control = 31)")
psp_hclust_prot %>% plot(cex = 0.6, main = "Proteins (Number of samples: PSP = 85, Control = 31)")
dev.off()
## png
## 2
# Set colours for heatmap, 25 increments
my_palette <- colorRampPalette(c("blue","white","red"))(n = 25)
# Plot heatmap with heatmap.2
png("./figs_prot/heatmap_psp.png", width = 2000, height = 2000, res = 300)
par(cex.main=0.75) # Shrink title fonts on plot
psp_scaled %>%
# Plot heatmap
gplots::heatmap.2(., # Tidy, normalised data
Colv=as.dendrogram(psp_hclust), # Experiments clusters in cols
Rowv=as.dendrogram(psp_hclust_prot), # Protein clusters in rows
revC=TRUE, # Flip plot to match pheatmap
density.info="histogram", # Plot histogram of data and colour key
trace="none", # Turn of trace lines from heat map
col = my_palette, # Use my colour scheme
cexRow=0.3, cexCol=0.3, # Amend row and column label fonts
xlab = paste0("Number of samples: PSP = ", length(psp_cols), ", Control = ", length(control_cols))
)
dev.off()
## png
## 2
Filter the data for AD vs PSP according to a threshold of significance: adjusted p-values 0.05 and log2 fold change to 0.3.
dat_filt_adpsp <- data_all %>% filter(abs(fc_ad_psp) >= fc_cut &
ad_psp_pv <= pv_cut) %>%
arrange(-abs(fc_ad_psp)) %>%
## slice_head(n=25) %>%
dplyr::select(PIDshort, (starts_with("AD.") | starts_with("PSP.")) )
Transform to matrix
## data.matrix() leaves numeric data as is.
adpsp_matrix <- data.matrix(dat_filt_adpsp)
rownames(adpsp_matrix) <- dat_filt_adpsp$PIDshort
adpsp_matrix <- adpsp_matrix[,-1]
adpsp_scaled <- adpsp_matrix %>% t %>% scale %>% t
Euclidean distance between experiments: AD & Control
adpsp_dist <- adpsp_scaled %>% t %>%
dist(., method = "euclidean", diag = FALSE, upper = FALSE)
Euclidean distance between proteins: AD & Control
adpsp_dist_prot <- adpsp_scaled %>%
dist(., method = "euclidean", diag = FALSE, upper = FALSE)
Hierarchical Ward’s clustering
adpsp_hclust <- hclust(adpsp_dist, method = "ward.D2", members = NULL)
adpsp_hclust_prot <- hclust(adpsp_dist_prot, method = "ward.D2", members = NULL)
png("./figs_prot/hclust_ad_psp.png", width = 2500, height = 3000, res = 300)
par(mfrow = c(2,1))
adpsp_hclust %>% plot(cex = 0.2, main = "Conditions (Number of samples: AD = 84, PSP = 85)")
adpsp_hclust_prot %>% plot(cex = 0.2, main = "Proteins (Number of samples: AD = 84, PSP = 85)")
dev.off()
## png
## 2
# Set colours for heatmap, 25 increments
my_palette <- colorRampPalette(c("blue","white","red"))(n = 25)
# Plot heatmap with heatmap.2
png("./figs_prot/heatmap_ad_psp.png", width = 2000, height = 2000, res = 300)
par(cex.main=0.75) # Shrink title fonts on plot
adpsp_scaled %>%
# Plot heatmap
gplots::heatmap.2(., # Tidy, normalised data
Colv=as.dendrogram(adpsp_hclust), # Experiments clusters in cols
Rowv=as.dendrogram(adpsp_hclust_prot), # Protein clusters in rows
revC=TRUE, # Flip plot to match pheatmap
density.info="histogram", # Plot histogram of data and colour key
trace="none", # Turn of trace lines from heat map
col = my_palette, # Use my colour scheme
cexRow=0.3, cexCol=0.3, # Amend row and column label fonts
xlab = paste0("Number of samples: AD = ", length(ad_cols), ", PSP = ", length(psp_cols))
)
dev.off()
## png
## 2
venn_list <- list("AD vs Control" = dat_fc_pv %>%
filter(ad_control_pv <= pv_cut) %>%
## filter(ad_control_pv <= pv_cut & abs(fc_control_ad) > fc_cut) %>%
pull(PIDshort),
"PSP vs Control" = dat_fc_pv %>%
filter(psp_control_pv <= pv_cut) %>%
## filter(psp_control_pv <= pv_cut & abs(fc_control_psp) > fc_cut) %>%
pull(PIDshort),
"AD vs PSP" = dat_fc_pv %>%
filter(ad_psp_pv <= pv_cut) %>%
## filter(ad_psp_pv <= pv_cut & abs(fc_ad_psp) > fc_cut) %>%
pull(PIDshort)
)
# Prevent the output of a log file
futile.logger::flog.threshold(futile.logger::ERROR, name = "VennDiagramLogger")
## NULL
# Create a venn diagram object
prot_venn <- venn.diagram(venn_list,
NULL,
col = "transparent",
fill = c("cornflowerblue", "green", "yellow"),
alpha = 0.50,
cex = 0.8,
fontfamily = "sans",
fontface = "bold",
cat.col = c("darkblue", "darkgreen", "orange"),
cat.cex = 0.8,
cat.fontfamily = "sans",
margin = 0.2,
main = paste0("Pairwise significantly DE proteins (adj.p<",pv_cut,") identified in the experiment"),
main.pos = c(0.5, 1.05),
main.fontfamily = "sans",
sub = "Number of samples: AD = 84, PSP = 85, Control = 31",
sub.pos = c(0.5, 0.92),
sub.cex = 0.8,
sub.fontfamily = "sans",
print.mode = c("raw","percent"), # Show both numbers and percent
)
# Plot the venn diagram using the gridExtra package
png("./figs_prot/venn.png", width = 2000, height = 2000, res = 300)
grid.arrange(gTree(children = prot_venn))
dev.off()
## png
## 2
venn_list2 <- list(
"AD-C +" = dat_fc_pv %>%
filter(ad_control_pv <= pv_cut & fc_control_ad >= 0) %>%
pull(PIDshort),
"AD-C -" = dat_fc_pv %>%
filter(ad_control_pv <= pv_cut & fc_control_ad < 0) %>%
pull(PIDshort),
"PSP-C +" = dat_fc_pv %>%
filter(psp_control_pv <= pv_cut & fc_control_psp >= 0) %>%
pull(PIDshort),
"PSP-C -" = dat_fc_pv %>%
filter(psp_control_pv <= pv_cut & fc_control_psp < 0) %>%
pull(PIDshort),
"AD-PSP +" = dat_fc_pv %>%
filter(ad_psp_pv <= pv_cut & fc_ad_psp >= 0) %>%
pull(PIDshort),
"AD-PSP -" = dat_fc_pv %>%
filter(ad_psp_pv <= pv_cut & fc_ad_psp < 0) %>%
pull(PIDshort)
)
prot_venn_up <- venn.diagram(venn_list2[c(1,3,5)],NULL,
col = "transparent",
fill = c("cornflowerblue", "green", "yellow"),
alpha = 0.50,
cex = 0.8,
fontfamily = "sans",
fontface = "bold",
cat.col = c("darkblue", "darkgreen", "orange"),
cat.cex = 0.8,
cat.fontfamily = "sans",
margin = 0.2,
main = "Pairwise significantly upregulated proteins (adj.p<0.05) identified in the experiment",
main.fontfamily = "sans",
sub = "Number of samples: AD = 84, PSP = 85, Control = 31",
sub.pos = c(0.5, 0.92),
sub.cex = 0.8,
sub.fontfamily = "sans",
print.mode = c("raw","percent"), # Show both numbers and percent
main.pos = c(0.5, 1.05)
)
# Plot the venn diagram using the gridExtra package
png("./figs_prot/venn_up.png", width = 2000, height = 2000, res = 300)
grid.arrange(gTree(children = prot_venn_up))
dev.off()
## png
## 2
prot_venn_down <- venn.diagram(venn_list2[c(2,4,6)],NULL,
col = "transparent",
fill = c("cornflowerblue", "green", "yellow"),
alpha = 0.50,
cex = 0.8,
fontfamily = "sans",
fontface = "bold",
cat.col = c("darkblue", "darkgreen", "orange"),
cat.cex = 0.8,
cat.fontfamily = "sans",
margin = 0.2,
main = "Pairwise significantly downregulated proteins (adj.p<0.05) identified in the experiment",
main.pos = c(0.5, 1.05),
main.fontfamily = "sans",
sub = "Number of samples: AD = 84, PSP = 85, Control = 31",
sub.pos = c(0.5, 0.92),
sub.cex = 0.8,
sub.fontfamily = "sans",
print.mode = c("raw","percent") # Show both numbers and percent
)
# Plot the venn diagram using the gridExtra package
png("./figs_prot/venn_down.png", width = 2000, height = 2000, res = 300)
grid.arrange(gTree(children = prot_venn_down))
dev.off()
## png
## 2
EnhancedVolcano(dat_fc_pv,
x = "fc_control_ad",
y = "ad_control_pv",
lab = dat_fc_pv$PIDshort,
title = 'AD versus Control',
subtitle = "Number of samples: AD = 84, PSP = 85, Control = 31",
pCutoff = pv_cut,
FCcutoff = fc_cut,
pointSize = 1.0,
labSize = 3.0) +
ggplot2::scale_y_continuous(limits = c(0, 7)) +
ggplot2::scale_x_continuous(limits = c(-1, 1))
## Scale for 'y' is already present. Adding another scale for 'y', which will
## replace the existing scale.
## Scale for 'x' is already present. Adding another scale for 'x', which will
## replace the existing scale.
ggsave("./figs_prot/ad_vs_control_volcano.png")
## Saving 7 x 5 in image
ad_vs_control_proteins <- dat_fc_pv %>%
dplyr::select(PIDshort, fc = fc_control_ad, pv = ad_control_pv) %>%
arrange(-abs(fc)) %>%
filter(pv <= pv_cut & abs(fc) >= fc_cut)
require(xlsx)
write.xlsx(ad_vs_control_proteins,
file = paste0("./results_prot/significantly_de_proteins_pv_", pv_cut, "_log2FC_", fc_cut, ".xlsx"),
sheetName = "AD vs Control", append=TRUE)
EnhancedVolcano(dat_fc_pv,
x = "fc_control_psp",
y = "psp_control_pv",
lab = dat_fc_pv$PIDshort,
title = 'PSP versus Control',
subtitle = "Number of samples: AD = 84, PSP = 85, Control = 31",
pCutoff = pv_cut,
FCcutoff = fc_cut,
pointSize = 1.0,
labSize = 3.0) +
ggplot2::scale_y_continuous(limits = c(0, 6)) +
ggplot2::scale_x_continuous(limits = c(-1, 1))
## Scale for 'y' is already present. Adding another scale for 'y', which will
## replace the existing scale.
## Scale for 'x' is already present. Adding another scale for 'x', which will
## replace the existing scale.
ggsave("./figs_prot/psp_vs_control_volcano.png")
## Saving 7 x 5 in image
psp_vs_control_proteins <- dat_fc_pv %>%
dplyr::select(PIDshort, fc = fc_control_psp, pv = psp_control_pv) %>%
arrange(-abs(fc)) %>%
filter(pv <= pv_cut & abs(fc) >= fc_cut)
require(xlsx)
write.xlsx(psp_vs_control_proteins,
file = paste0("./results_prot/significantly_de_proteins_pv_", pv_cut, "_log2FC_", fc_cut, ".xlsx"),
sheetName = "PSP vs Control", append=TRUE)
EnhancedVolcano(dat_fc_pv,
x = "fc_ad_psp",
y = "ad_psp_pv",
lab = dat_fc_pv$PIDshort,
title = 'PSP versus AD',
subtitle = "Number of samples: AD = 84, PSP = 85, Control = 31",
pCutoff = pv_cut,
FCcutoff = fc_cut,
pointSize = 1.0,
labSize = 3.0) +
ggplot2::scale_y_continuous(limits = c(0, 20)) +
ggplot2::scale_x_continuous(limits = c(-1, 1))
## Scale for 'y' is already present. Adding another scale for 'y', which will
## replace the existing scale.
## Scale for 'x' is already present. Adding another scale for 'x', which will
## replace the existing scale.
ggsave("./figs_prot/psp_vs_ad_volcano.png")
## Saving 7 x 5 in image
psp_vs_ad_proteins <- dat_fc_pv %>%
dplyr::select(PIDshort, fc = fc_ad_psp, pv = ad_psp_pv) %>%
arrange(-abs(fc)) %>%
filter(pv <= pv_cut & abs(fc) >= fc_cut)
require(xlsx)
## write.xlsx(psp_vs_ad_proteins, "significantly_de_proteins.xlsx", sheetName = "PSP vs AD", append=TRUE)
write.xlsx(psp_vs_ad_proteins,
file = paste0("./results_prot/significantly_de_proteins_pv_", pv_cut, "_log2FC_", fc_cut, ".xlsx"),
sheetName = "PSP vs AD", append=TRUE)
## On the example of AD vs Control.
## At first i took the significantly DE genes for e.g. AD vs Control.
## Data with FC and p-values has already been collected in dat_fc_pv. I
## had to select data from this table only for AD vs Control, since it
## was necessary to filter out significant genes for this particular
## variant.
ad_vs_control_names <- dat_fc_pv %>%
## Filtered out relevant proteins. In this case, with p-vales <= pv_cut and abs(log2FC) >= fc_cut
filter(abs(fc_control_ad) >= fc_cut & ad_control_pv <= pv_cut) %>%
## Sorting the table
arrange(-abs(fc_control_ad)) %>%
slice_head(n=25) %>%
## The full form of protein names is needed to select them from the
## cra.log table. The short form of protein names is needed for the
## graph.
dplyr::select(ProteinIDs, PIDshort)
## Then, I filtered the required proteins from the cra.log dataframe,
## transposed the resulting table and changed its class from matrix to
## dataframe.
avc <- cra.log[ad_vs_control_names$ProteinIDs, ] %>% t %>% data.frame
## Renamed columns by protein names.
colnames(avc) <- ad_vs_control_names$PIDshort
## Created a "condition" column filled with NAs. It is needed to build boxplots.
avc$condition <- NA
## In the "condition" column, I replaced NAs with the variants names: Control, AD, PSP.
avc[control_cols, ]$condition <- "Control"
avc[ad_cols, ]$condition <- "AD"
avc[psp_cols, ]$condition <- "PSP"
avc %>% filter(condition != "PSP") %>%
## For ggplot, you need to convert the data from wide format to long format.
pivot_longer(-condition, names_to = "protein", values_to = "level") %>%
## gglot uses the "condition" column to group boxplots: fill=condition in aes().
ggplot(aes(protein, level, fill=condition)) +
geom_boxplot(outlier.size = 0.5) +
scale_x_discrete(guide = guide_axis(n.dodge = 3)) +
xlab("") +
ylab("log2 expression level") +
ggtitle("AD vs Control top 25 significant proteins (Number of samples: AD = 84, Control = 31)") +
theme_light() +
theme(plot.title = element_text(size = 16, face = "bold"))
## Warning: Removed 322 rows containing non-finite values (stat_boxplot).
ggsave("./figs_prot/boxplot_ad_vs_control.png", dpi = 300, scale = 2, width = 6, height = 3, units = "in")
## Warning: Removed 322 rows containing non-finite values (stat_boxplot).
psp_vs_control_names <- dat_fc_pv %>%
filter(psp_control_pv <= pv_cut & abs(fc_control_psp) >= fc_cut) %>%
arrange(-abs(fc_control_psp)) %>%
slice_head(n=25) %>%
dplyr::select(ProteinIDs, PIDshort)
pvc <- cra.log[psp_vs_control_names$ProteinIDs, ] %>% t %>% data.frame
colnames(pvc) <- psp_vs_control_names$PIDshort
pvc$condition <- NA
pvc[control_cols, ]$condition <- "Control"
pvc[ad_cols, ]$condition <- "AD"
pvc[psp_cols, ]$condition <- "PSP"
pvc %>% filter(condition != "AD") %>%
pivot_longer(-condition, names_to = "protein", values_to = "level") %>%
mutate(conditions = factor(condition, levels = c("PSP", "Control"))) %>%
ggplot(aes(protein, level, fill=conditions)) +
geom_boxplot(outlier.size = 0.5) +
scale_x_discrete(guide = guide_axis(n.dodge = 3)) +
xlab("") +
ylab("log2 expression level") +
ggtitle("PSP vs Control top 25 significant proteins (Number of samples: PSP = 85, Control = 31)") +
theme_light() +
theme(plot.title = element_text(size = 16, face = "bold"))
## Warning: Removed 346 rows containing non-finite values (stat_boxplot).
ggsave("./figs_prot/boxplot_psp_vs_control.png", dpi = 300, scale = 2, width = 6, height = 3, units = "in")
## Warning: Removed 346 rows containing non-finite values (stat_boxplot).
psp_vs_ad_names <- dat_fc_pv %>%
filter(abs(fc_ad_psp) >= fc_cut & ad_psp_pv <= pv_cut ) %>%
arrange(-abs(fc_ad_psp)) %>%
slice_head(n=25) %>%
dplyr::select(ProteinIDs, PIDshort)
pva <- cra.log[psp_vs_ad_names$ProteinIDs, ] %>% t %>% data.frame
## %>% t %>% data.frame
colnames(pva) <- psp_vs_ad_names$PIDshort
pva$condition <- NA
pva[control_cols, ]$condition <- "Control"
pva[ad_cols, ]$condition <- "AD"
pva[psp_cols, ]$condition <- "PSP"
pva %>% filter(condition != "Control") %>%
pivot_longer(-condition, names_to = "protein", values_to = "level") %>%
ggplot(aes(protein, level, fill=condition)) +
geom_boxplot(outlier.size = 0.5) +
scale_x_discrete(guide = guide_axis(n.dodge = 3)) +
xlab("") +
ylab("log2 expression level") +
ggtitle("PSP vs AD top 25 significant proteins (Number of samples: AD = 84, PSP = 85)") +
theme_light() +
theme(plot.title = element_text(size = 16, face = "bold"))
## Warning: Removed 392 rows containing non-finite values (stat_boxplot).
ggsave("./figs_prot/boxplot_ad_vs_psp.png", dpi = 300, scale = 2, width = 6, height = 3, units = "in")
## Warning: Removed 392 rows containing non-finite values (stat_boxplot).
d 167 rows containing non-finite values (stat_boxplot).
transcripts <- read.delim("./data_transcr/MayoRNAseq_RNAseq_TCX_transcriptCounts.tsv",
stringsAsFactors = FALSE) %>%
`rownames<-`(.$ensembl_id)
ensembl_id <- data.frame(transcripts[1])
transcripts <- transcripts[-1]
We form a table with the names of gene samples, RNA and diagnosis.
## transcripts_norm %>% names %>% sub("^.+_([0-9]+)_.+", "\\1", .)
names(transcripts) <- transcripts %>%
names %>%
sub("^X([0-9]+_.+)", "\\1", .)
id_keys_raw <- read.csv("./data_transcr/Mayo_Proteomics_ID_key.csv",
strip.white = TRUE,
header = TRUE,
stringsAsFactors = FALSE) %>%
na.omit %>%
dplyr::select(1:2)
id_keys <- read.csv("./data_transcr/Mayo_Proteomics_TC_traits.csv",
strip.white = TRUE,
header = TRUE,
stringsAsFactors = FALSE) %>%
slice_head(n=200) %>%
dplyr::select(c(1,4)) %>%
cbind.data.frame(., id_keys_raw) %>%
dplyr::select(c(1,4,2)) %>%
filter(Samples_Simple != "b1_091_20b")
rm(id_keys_raw)
samples_in_common <- intersect(names(transcripts),
id_keys$RNA_SampleID)
We leave only lines with samples in common.
id_keys <- id_keys %>%
filter(RNA_SampleID %in% samples_in_common)
Selecting only relevant columns from the data.
transcripts <- transcripts %>%
dplyr::select(all_of(id_keys$RNA_SampleID))
We get and save the column numbers for each of the conditions (AD, PSP, Control).
control_cols <- id_keys %>%
mutate(n = rownames(.) %>% as.numeric) %>%
filter(Diagnosis == "Control") %>%
.$n
ad_cols <- id_keys %>%
mutate(n = rownames(.) %>% as.numeric) %>%
filter(Diagnosis == "AD") %>%
.$n
psp_cols <- id_keys %>%
mutate(n = rownames(.) %>% as.numeric) %>%
filter(Diagnosis == "PSP") %>%
.$n
Genes with very low counts across all libraries provide little evidence for differential expression.
To filter out lowly expressed genes, we filtering on a minimum counts per million threshold present in at least 31 samples. 31 represents the smallest sample size (in Control group) for each group in our experiment. We choose to retain genes if they are expressed at a counts-per-million (CPM) above 1 in at least 31 samples.
We’ll use the cpm function from the edgeR library to generate the CPM
values. By converting to CPMs we are normalising for the different
sequencing depths for each sample.
# Obtain CPMs
transcripts_cpm <- cpm(transcripts)
# Have a look at the output
head(transcripts_cpm)[,1:5]
## 7095_TCX 1019_TCX 1137_TCX 1036_TCX 1010_TCX
## ENST00000000233 28.24212 56.10630 105.82034 67.82287 67.92063
## ENST00000000412 2.14055 4.98927 4.44295 4.16509 4.36092
## ENST00000000442 0.08737 0.09183 0.09006 0.12015 0.10445
## ENST00000001008 19.28677 91.76579 92.88174 140.55173 85.91268
## ENST00000001146 2.03134 9.15209 33.74243 18.54266 7.78176
## ENST00000002125 0.00000 0.06122 0.09006 0.04005 0.02611
A threshold of 1 CPM in at least minimum group sample size is a good rule of thumb.
# Which values in myCPM are greater than 0.5?
thresh <- transcripts_cpm > 1
# This produces a logical matrix with TRUEs and FALSEs
head(thresh)[,1:5]
## 7095_TCX 1019_TCX 1137_TCX 1036_TCX 1010_TCX
## ENST00000000233 TRUE TRUE TRUE TRUE TRUE
## ENST00000000412 TRUE TRUE TRUE TRUE TRUE
## ENST00000000442 FALSE FALSE FALSE FALSE FALSE
## ENST00000001008 TRUE TRUE TRUE TRUE TRUE
## ENST00000001146 TRUE TRUE TRUE TRUE TRUE
## ENST00000002125 FALSE FALSE FALSE FALSE FALSE
# Summary of how many TRUEs there are in each row
# There are 11433 genes that have TRUEs in all 12 samples.
table(rowSums(thresh))
##
## 0 1 2 3 4 5 6 7 8 9 10
## 137246 5138 1747 1204 858 715 602 515 458 398 390
## 11 12 13 14 15 16 17 18 19 20 21
## 365 346 331 290 248 299 237 219 246 223 190
## 22 23 24 25 26 27 28 29 30 31 32
## 209 184 209 166 188 186 162 171 141 166 142
## 33 34 35 36 37 38 39 40 41 42 43
## 117 143 134 142 126 143 123 136 134 141 115
## 44 45 46 47 48 49 50 51 52 53 54
## 131 142 122 113 104 101 105 93 125 106 120
## 55 56 57 58 59 60 61 62 63 64 65
## 100 94 99 113 97 107 101 86 118 106 100
## 66 67 68 69 70 71 72 73 74 75 76
## 111 109 84 86 101 106 79 88 103 94 94
## 77 78 79 80 81 82 83 84 85 86 87
## 90 93 105 94 80 98 91 114 90 92 89
## 88 89 90 91 92 93 94 95 96 97 98
## 90 86 106 91 91 81 96 102 93 81 86
## 99 100 101 102 103 104 105 106 107 108 109
## 89 81 102 88 106 75 90 79 93 86 101
## 110 111 112 113 114 115 116 117 118 119 120
## 96 78 94 86 89 85 75 95 96 75 110
## 121 122 123 124 125 126 127 128 129 130 131
## 82 95 90 95 82 80 88 94 100 102 109
## 132 133 134 135 136 137 138 139 140 141 142
## 97 94 99 110 98 93 92 96 107 118 95
## 143 144 145 146 147 148 149 150 151 152 153
## 121 133 100 106 117 102 113 118 112 135 115
## 154 155 156 157 158 159 160 161 162 163 164
## 131 120 124 120 126 139 135 135 141 132 147
## 165 166 167 168 169 170 171 172 173 174 175
## 142 190 156 179 183 201 176 191 194 211 236
## 176 177 178 179 180 181 182 183 184 185 186
## 251 243 269 256 292 325 340 408 396 421 464
## 187 188 189 190 191 192 193 194 195 196 197
## 517 532 644 656 795 930 1063 1313 2013 3445 22685
# we would like to keep genes that have at least 31 TRUES in each row of thresh
keep <- rowSums(thresh) >= 31
# Subset the rows of countdata to keep the more highly expressed genes
transcripts_keep <- transcripts[keep,]
summary(keep)
## Mode FALSE TRUE
## logical 153881 54363
dim(transcripts_keep)
## [1] 54363 197
# Let's have a look and see whether our threshold of 1 does indeed correspond to a count of about 30-40
# We will look at the first sample
## plot(transcripts_cpm[,1],transcripts[,1],
## ylim=c(0,50),xlim=c(0,3))
## abline(v=1)
Next we’ll create a DGEList object. This is an object used by edgeR
to store count data. It has a number of slots for storing various
parameters about the data.
dgeObj <- DGEList(transcripts_keep)
# have a look at dgeObj
dgeObj[,1:5]
## An object of class "DGEList"
## $counts
## 7095_TCX 1019_TCX 1137_TCX 1036_TCX 1010_TCX
## ENST00000000233 1293 1833 3525 3387 2601
## ENST00000000412 98 163 148 208 167
## ENST00000001008 883 2998 3094 7019 3290
## ENST00000001146 93 299 1124 926 298
## ENST00000002165 91 556 435 918 758
## 54358 more rows ...
##
## $samples
## group lib.size norm.factors
## 7095_TCX 1 45550775 1
## 1019_TCX 1 31991405 1
## 1137_TCX 1 32772147 1
## 1036_TCX 1 49196988 1
## 1010_TCX 1 37712160 1
We can look at a few different plots to check that the data is good quality.
Check how many reads we have for each sample in the dgeObj.
dgeObj$samples$lib.size
## [1] 45550775 31991405 32772147 49196988 37712160 30090126 35546996 66964691
## [9] 37161623 36941520 52712105 47641750 40568238 34014083 35280111 39318949
## [17] 42250545 40768654 43110918 35437439 38756019 36919775 39396416 37576727
## [25] 53457952 39096885 48610519 48500575 47069216 38149904 48966776 65230041
## [33] 40866245 39182880 36113104 55386798 36813570 41440955 41602120 30661837
## [41] 39852497 44509078 37017685 34774158 43782636 52191604 44053451 53062467
## [49] 71336831 53478854 40339548 38057041 40970127 41058288 41986088 40102188
## [57] 44425361 34548532 33678531 30455478 36325005 37520429 45039493 35615322
## [65] 52198010 40447856 40235904 61684913 50005587 37795709 45376671 38364056
## [73] 37013042 86669819 50845067 38775710 38862319 54822972 33099333 44024426
## [81] 47236330 38231326 40137081 32839797 39832435 44608859 44001175 41780728
## [89] 50979762 48105520 40766437 37050055 50424532 40393128 37745480 33890328
## [97] 46091083 43491393 59223517 42756890 42241034 51544473 49324578 42474170
## [105] 40672703 41635851 40868376 33838611 40849880 36383401 52890008 37009451
## [113] 57004673 43353962 42141625 57860670 37061874 43585299 38661440 43606840
## [121] 53672440 36449713 67021400 36207240 43826555 45620641 44653077 35162204
## [129] 35296146 31671760 40000259 54625935 37062573 31831658 39451368 33574059
## [137] 39877799 35192796 36215395 37830907 37437957 41985777 38319939 40262098
## [145] 47492240 59190811 52264681 37924736 46373214 42023695 31880881 52914487
## [153] 41690501 42190452 55767469 39637985 46754624 59240719 39735264 36314715
## [161] 39302145 44735347 35900319 45308074 39357966 40745343 36977404 48982614
## [169] 42860677 33780491 46846071 50115573 40692380 58390293 43829550 34106422
## [177] 33802029 48618773 37992006 47333444 46674794 55557091 35428577 39737316
## [185] 52865317 43324852 42344695 43715703 33518065 44840057 41127482 38817917
## [193] 54159083 64033266 47061409 35876223 47488721
Plot the library sizes as a barplot to see whether there are any major discrepanies between the samples more easily.
png("./figs_transcr/barplot_sample_sizes.png", width = 2000, height = 2000, res = 300)
# The names argument tells the barplot to use the sample names on the x-axis
# The las argument rotates the axis names
barplot(dgeObj$samples$lib.size, names=colnames(dgeObj), las=2, cex.names = .5)
# Add a title to the plot
## title("Barplot of library sizes")
title(paste("Barplot of library sizes\n", "Number of samples: AD = 84, PSP = 85, Control = 31"))
dev.off()
## png
## 2
Count data is not normally distributed, so if we want to examine the
distributions of the raw counts we need to log the counts. Next we’ll
use box plots to check the distribution of the read counts on the log2
scale. We can use the cpm function to get log2 counts per million,
which are corrected for the different library sizes. The cpm function
also adds a small offset to avoid taking log of zero.
# Get log2 counts per million
logcounts <- cpm(dgeObj, log = TRUE)
# Check distributions of samples using boxplot
png("./figs_transcr/boxplot_samples_unnorm.png", width = 4000, height = 2000, res = 300)
boxplot(logcounts, xlab="", ylab="Log2 counts per million", las=2, cex.axis=.4, outcex=.1)
# Let's add a blue horizontal line that corresponds to the median logCPM
abline(h=median(logcounts),col="blue")
title("Boxplots of logCPMs (unnormalised). Number of samples: AD = 82, PSP = 84, Control = 31")
dev.off()
## png
## 2
From the boxplots we see that overall the density distributions of raw log-intensities are not identical but still not very different.
We use the plotMDS function to create the MDS plot. We colour the
samples according to the grouping information.
levels(id_keys$Diagnosis %>% as.factor)
## [1] "AD" "Control" "PSP"
col.condition <- c("red", "green", "purple")[id_keys$Diagnosis %>% as.factor]
png("./figs_transcr/mds_plot.png", width = 2000, height = 2000, res = 300)
plotMDS(dgeObj, col=col.condition)
legend("topleft", fill=c("red", "green", "purple"), legend = levels(id_keys$Diagnosis %>% as.factor))
# Add a title
title("MDS plot. Number of samples: AD = 82, PSP = 84, Control = 31")
dev.off()
## png
## 2
The trimmed mean of M-values normalization method (TMM) is performed
to eliminate composition biases between libraries. This generates a
set of normalization factors, where the product of these factors and
the library sizes defines the effective library size. The
calcNormFactors function in edgeR calculates the normalization factors
between libraries. TMM normalisation (and most scaling normalisation
methods) scale relative to one sample.
# Apply normalisation to DGEList object
dgeObj <- calcNormFactors(dgeObj)
This will update the normalisation factors in the DGEList object
(their default values are 1).
dgeObj$samples %>% head
## group lib.size norm.factors
## 7095_TCX 1 45550775 0.2836
## 1019_TCX 1 31991405 1.1594
## 1137_TCX 1 32772147 1.0841
## 1036_TCX 1 49196988 1.0406
## 1010_TCX 1 37712160 1.0552
## 871_TCX 1 30090126 1.0234
The normalization factors multiply to unity across all libraries. A normalization factor below one indicates that the library size will be scaled down, as there is more suppression (i.e., composition bias) in that library relative to the other libraries. This is also equivalent to scaling the counts upwards in that sample. Conversely, a factor above one scales up the library size and is equivalent to downscaling the counts.
The first two samples have very different normalisation factors. If we
plot mean difference plots using the plotMD function for these
samples, we see the composition bias problem. We will use the
logcounts, which have been normalised for library size, but not for
composition bias.
png("./figs_transcr/mean_difference.png", width = 2000, height = 2000, res = 300)
par(mfrow=c(1,2))
plotMD(logcounts, column = 1)
abline(h=0,col="grey")
plotMD(logcounts,column = 2)
abline(h=0,col="grey")
dev.off()
## png
## 2
The mean-difference plots show average expression (mean: x-axis)
against log-fold-changes (difference: y-axis). Because our DGEList
object contains the normalisation factors, plots using dgeObj, show
the composition bias problem has been solved.
png("./figs_transcr/mean_difference_2.png", width = 2000, height = 2000, res = 300)
par(mfrow=c(1,2))
plotMD(dgeObj,column = 1)
abline(h=0,col="grey")
plotMD(dgeObj,column = 2)
abline(h=0,col="grey")
dev.off()
## png
## 2
## save(group, dgeObj,sampleinfo,file="./Robjects/preprocessing.Rdata")
diagnosis <- as.character(id_keys$Diagnosis)
design <- model.matrix( ~ diagnosis + 0)
## plotMDS(dgeObj, labels=diagnosis, cex=0.75, xlim=c(-4, 5))
The common dispersion estimates the overall BCV of the dataset, averaged over all genes:
dgeObj <- estimateCommonDisp(dgeObj)
Then we estimate gene-wise dispersion estimates, allowing a possible trend with averge count size:
dgeObj <- estimateGLMTrendedDisp(dgeObj)
dgeObj <- estimateTagwiseDisp(dgeObj)
Plot the estimated dispersions:
png("./figs_transcr/est_dispersion.png", width = 2000, height = 2000, res = 300)
plotBCV(dgeObj)
mytitle = "Number of samples: AD = 82, PSP = 84, Control = 31"
mtext(side=3, line=2, at=-0.01, adj=0, cex=1, mytitle)
dev.off()
## png
## 2
First, we fit genewise glms:
# Fit the linear model
fit <- glmFit(dgeObj, design)
names(fit)
## [1] "coefficients" "fitted.values" "deviance"
## [4] "method" "counts" "unshrunk.coefficients"
## [7] "df.residual" "design" "offset"
## [10] "dispersion" "prior.count" "samples"
## [13] "prior.df" "AveLogCPM"
head(coef(fit))
## diagnosisAD diagnosisControl diagnosisPSP
## ENST00000000233 -9.658 -9.327 -9.516
## ENST00000000412 -12.336 -12.295 -12.362
## ENST00000001008 -9.144 -9.235 -8.912
## ENST00000001146 -11.217 -11.193 -10.772
## ENST00000002165 -10.919 -11.137 -11.082
## ENST00000002596 -12.246 -11.869 -12.035
Conduct likelihood ratio tests for luminal vs basal and show the top genes:
Build a contrast AD vs Control
AvsC <- makeContrasts(diagnosisAD - diagnosisControl, levels=design)
lrt.AvsC <- glmLRT(fit, contrast=AvsC)
## topTags(lrt.AvsC, sort.by = "logFC", p.value = 0.05, n = 61)
Save list of gene IDs for transcripts.
## gene_ids_tr <- lrt.AvsC %>%
## rownames %>%
## gconvert(organism="hsapiens", target="ENSG", filter_na = FALSE) %>%
## dplyr::select("GeneID" = name)
## saveRDS(gene_ids_tr, "./results_transcr/gene_ids_tr.rds")
gene_ids_tr <- readRDS("./results_transcr/gene_ids_tr.rds")
## Set log-fold cutoff for transcripts
fc_cut_tr <- 2
ad_vs_control_top_tr <- topTags(lrt.AvsC, sort.by = "logFC", p.value = 0.05, n = 100) %>%
data.frame %>%
filter(abs(logFC) >= fc_cut_tr) %>%
rownames_to_column(var = "TranscriptIDs")
AD_vs_Control_GeneIDs <- ad_vs_control_top_tr$TranscriptIDs %>%
noquote %>%
gconvert(organism="hsapiens", target="ENSG") %>%
dplyr::select("GeneID" = name)
## write.csv(ad_vs_control_prot, "./results/prot_ad_vs_control.csv")
write.csv(data.frame(AD_vs_Control_GeneIDs, ad_vs_control_top_tr),
"./results_transcr/ad_vs_control_tr.csv")
knitr::kable(data.frame(AD_vs_Control_GeneIDs, ad_vs_control_top_tr),
caption = "Top DE transcripts: AD vs Control")
| GeneID | TranscriptIDs | logFC | logCPM | LR | PValue | FDR |
|---|---|---|---|---|---|---|
| FER1L6 | ENST00000522917 | 3.731 | 0.6279 | 29.43 | 0 | 0 |
| MTCO2P12 | ENST00000427426 | -3.572 | 3.5340 | 80.84 | 0 | 0 |
| BMP5 | ENST00000370830 | 3.521 | 1.1730 | 33.40 | 0 | 0 |
| SCARA5 | ENST00000380385 | 3.452 | 0.6688 | 34.61 | 0 | 0 |
| SLC47A1 | ENST00000436810 | 3.397 | 0.6401 | 41.97 | 0 | 0 |
| CD177 | ENST00000618265 | 3.392 | 0.3137 | 43.55 | 0 | 0 |
| FAT2 | ENST00000261800 | -3.386 | 1.1699 | 89.19 | 0 | 0 |
| SCARA5 | ENST00000354914 | 3.322 | 1.7634 | 32.97 | 0 | 0 |
| OGN | ENST00000262551 | 3.223 | 3.3344 | 31.91 | 0 | 0 |
| SLPI | ENST00000338380 | 2.955 | 2.3328 | 25.58 | 0 | 0 |
| CYP1B1 | ENST00000494864 | 2.890 | 1.0422 | 48.74 | 0 | 0 |
| IL1R2 | ENST00000332549 | 2.849 | -0.7876 | 34.76 | 0 | 0 |
| CD163 | ENST00000396620 | 2.833 | -0.2048 | 30.21 | 0 | 0 |
| SIX2 | ENST00000303077 | 2.786 | 0.0841 | 28.12 | 0 | 0 |
| CYP1B1 | ENST00000614273 | 2.748 | -0.1752 | 45.53 | 0 | 0 |
| HSPA6 | ENST00000309758 | -2.714 | 3.6476 | 32.50 | 0 | 0 |
| STEAP4 | ENST00000380079 | 2.674 | 1.0209 | 33.91 | 0 | 0 |
| GRM4 | ENST00000609973 | -2.643 | 0.3437 | 69.93 | 0 | 0 |
| DSP | ENST00000379802 | 2.639 | 3.2340 | 40.10 | 0 | 0 |
| CBLN3 | ENST00000267406 | -2.628 | 2.2207 | 82.78 | 0 | 0 |
| IGFBP5 | ENST00000449583 | 2.569 | 1.1627 | 54.59 | 0 | 0 |
| SLC13A4 | ENST00000491630 | 2.544 | -0.0148 | 21.67 | 0 | 0 |
| IGFBP5 | ENST00000486341 | 2.510 | 0.5922 | 53.62 | 0 | 0 |
| RGS1 | ENST00000474373 | 2.503 | 0.2359 | 26.97 | 0 | 0 |
| S100A12 | ENST00000368737 | 2.486 | 0.3891 | 26.14 | 0 | 0 |
| SELE | ENST00000367775 | -2.446 | 0.9562 | 32.06 | 0 | 0 |
| SERPIND1 | ENST00000215727 | 2.429 | 0.2412 | 35.80 | 0 | 0 |
| FMOD | ENST00000493296 | 2.425 | -0.1835 | 38.60 | 0 | 0 |
| GRM4 | ENST00000535756 | -2.408 | 2.8107 | 76.04 | 0 | 0 |
| ZIC4 | ENST00000472749 | -2.400 | 0.6030 | 52.99 | 0 | 0 |
| CD163 | ENST00000539632 | 2.358 | 0.1737 | 23.29 | 0 | 0 |
| CD163 | ENST00000538840 | 2.358 | 0.1730 | 23.27 | 0 | 0 |
| CD44 | ENST00000278386 | 2.302 | 1.4183 | 34.36 | 0 | 0 |
| DSP | ENST00000418664 | 2.298 | 2.6612 | 34.63 | 0 | 0 |
| AC093330.1 | ENST00000577364 | -2.261 | 2.4077 | 73.70 | 0 | 0 |
| CD44 | ENST00000526669 | 2.236 | 0.6769 | 32.71 | 0 | 0 |
| ZIC4 | ENST00000383075 | -2.212 | 0.9448 | 47.38 | 0 | 0 |
| VNN2 | ENST00000418593 | 2.206 | -0.6440 | 36.21 | 0 | 0 |
| CBLN1 | ENST00000219197 | -2.204 | 2.5834 | 68.32 | 0 | 0 |
| KRT31 | ENST00000251645 | -2.189 | 0.1656 | 67.92 | 0 | 0 |
| MPZL2 | ENST00000527282 | 2.185 | -0.5619 | 37.78 | 0 | 0 |
| TMEM30B | ENST00000555868 | 2.177 | 0.7573 | 24.04 | 0 | 0 |
| MTRNR2L8 | ENST00000536684 | -2.175 | 1.8293 | 71.31 | 0 | 0 |
| SLC16A12 | ENST00000371790 | 2.158 | 0.4949 | 28.82 | 0 | 0 |
| EMP1 | ENST00000542289 | 2.154 | 1.5059 | 49.70 | 0 | 0 |
| NDNF | ENST00000379692 | 2.123 | 0.8420 | 45.68 | 0 | 0 |
| PFKP | ENST00000460445 | 2.116 | 0.0379 | 51.21 | 0 | 0 |
| AQP4 | ENST00000578776 | 2.110 | 3.5511 | 50.64 | 0 | 0 |
| AQP4 | ENST00000383170 | 2.097 | 4.1627 | 50.88 | 0 | 0 |
| EMP1 | ENST00000431267 | 2.093 | 0.3996 | 48.27 | 0 | 0 |
| DSG2 | ENST00000261590 | 2.090 | -0.1590 | 26.88 | 0 | 0 |
| PHLDB2 | ENST00000393923 | 2.086 | 2.2024 | 40.22 | 0 | 0 |
| FCGR3B | ENST00000367964 | 2.074 | -0.7721 | 31.47 | 0 | 0 |
| LEPR | ENST00000371059 | 2.060 | 2.4428 | 41.95 | 0 | 0 |
| CD163 | ENST00000359156 | 2.051 | 4.0504 | 20.36 | 0 | 0 |
| SERPINA5 | ENST00000553780 | 2.051 | 0.1347 | 31.61 | 0 | 0 |
| AQP4 | ENST00000584088 | 2.048 | 1.6928 | 50.14 | 0 | 0 |
| SPTLC3 | ENST00000399002 | 2.047 | 2.0217 | 27.60 | 0 | 0 |
| NPNT | ENST00000514837 | 2.037 | 0.4600 | 62.83 | 0 | 0 |
| FAM20A | ENST00000226094 | 2.033 | 0.6134 | 28.98 | 0 | 0 |
| CD44 | ENST00000434472 | 2.012 | -0.0146 | 29.46 | 0 | 0 |
We get normalized data with cpm(dgeObj, normalized.lib.sizes = TRUE) for the top AD vs Control transcripts.
all_samples_ad_vs_control_tr <- cpm(dgeObj, normalized.lib.sizes = TRUE) %>%
data.frame() %>%
filter(rownames(.) %in% ad_vs_control_top_tr$TranscriptIDs) %>%
dplyr::select(all_of(ad_cols)) %>%
`rownames<-`(AD_vs_Control_GeneIDs$GeneID %>% make.unique)
Build a contrast PSP vs Control
PvsC <- makeContrasts(diagnosisPSP - diagnosisControl, levels=design)
lrt.PvsC <- glmLRT(fit, contrast=PvsC)
## topTags(lrt.PvsC, sort.by = "logFC", p.value = 0.05, n = 25)
## fold-change cutoff here is 1.9 to make the numger of transcripts
## close to the number of proteins
psp_vs_control_top_tr <- topTags(lrt.PvsC, sort.by = "logFC", p.value = 0.05, n = 200) %>%
data.frame %>%
filter(abs(logFC) >= 1.9) %>%
rownames_to_column(var = "TranscriptIDs")
PSP_vs_Control_GeneIDs <- psp_vs_control_top_tr$TranscriptIDs %>%
noquote %>%
gconvert(organism="hsapiens", target="ENSG") %>%
dplyr::select("GeneID" = name)
write.csv(data.frame(PSP_vs_Control_GeneIDs, psp_vs_control_top_tr),
"./results_transcr/psp_vs_control_tr.csv")
knitr::kable(data.frame(PSP_vs_Control_GeneIDs, psp_vs_control_top_tr),
caption = "Top DE transcripts: PSP vs Control")
| GeneID | TranscriptIDs | logFC | logCPM | LR | PValue | FDR |
|---|---|---|---|---|---|---|
| COL3A1 | ENST00000304636 | 3.856 | 2.3556 | 37.78 | 0 | 0e+00 |
| TGFBI | ENST00000442011 | 3.650 | 2.9260 | 37.31 | 0 | 0e+00 |
| MARCO | ENST00000327097 | 3.364 | 0.6494 | 31.49 | 0 | 0e+00 |
| COL1A1 | ENST00000225964 | 3.341 | 2.6313 | 32.87 | 0 | 0e+00 |
| ZIC4 | ENST00000472749 | -3.216 | 0.6030 | 96.46 | 0 | 0e+00 |
| ZIC4 | ENST00000383075 | -3.050 | 0.9448 | 91.79 | 0 | 0e+00 |
| CD163 | ENST00000396620 | 2.968 | -0.2048 | 32.68 | 0 | 0e+00 |
| HMOX1 | ENST00000412893 | 2.921 | 2.2612 | 37.14 | 0 | 0e+00 |
| FAT2 | ENST00000261800 | -2.784 | 1.1699 | 61.55 | 0 | 0e+00 |
| SELE | ENST00000367775 | -2.766 | 0.9562 | 41.67 | 0 | 0e+00 |
| RGS1 | ENST00000474373 | 2.763 | 0.2359 | 31.89 | 0 | 0e+00 |
| CBLN3 | ENST00000267406 | -2.664 | 2.2207 | 86.27 | 0 | 0e+00 |
| IFI30 | ENST00000597802 | 2.660 | 0.9885 | 30.06 | 0 | 0e+00 |
| CD163 | ENST00000539632 | 2.537 | 0.1737 | 26.42 | 0 | 0e+00 |
| CD163 | ENST00000538840 | 2.536 | 0.1730 | 26.40 | 0 | 0e+00 |
| MSR1 | ENST00000262101 | 2.523 | 1.5579 | 27.73 | 0 | 0e+00 |
| ZIC1 | ENST00000282928 | -2.370 | 2.4326 | 69.10 | 0 | 0e+00 |
| ZIC1 | ENST00000472523 | -2.317 | 2.6913 | 67.03 | 0 | 0e+00 |
| GRM4 | ENST00000609973 | -2.307 | 0.3437 | 53.80 | 0 | 0e+00 |
| COL6A3 | ENST00000295550 | 2.280 | 0.8936 | 27.34 | 0 | 0e+00 |
| AC093330.1 | ENST00000577364 | -2.210 | 2.4077 | 71.21 | 0 | 0e+00 |
| GRM4 | ENST00000535756 | -2.152 | 2.8107 | 61.19 | 0 | 0e+00 |
| CD163 | ENST00000359156 | 2.141 | 4.0504 | 22.00 | 0 | 0e+00 |
| LIF | ENST00000249075 | 2.128 | 0.1906 | 17.03 | 0 | 1e-04 |
| MTCO3P12 | ENST00000416718 | -2.094 | 2.8876 | 75.96 | 0 | 0e+00 |
| CD68 | ENST00000584502 | 2.074 | 0.8735 | 30.40 | 0 | 0e+00 |
| AL359091.1 | ENST00000608502 | -2.023 | 4.2662 | 61.92 | 0 | 0e+00 |
| MTND2P28 | ENST00000457540 | -2.010 | 8.5698 | 56.98 | 0 | 0e+00 |
| KRT31 | ENST00000251645 | -2.005 | 0.1656 | 57.44 | 0 | 0e+00 |
| MTRNR2L8 | ENST00000536684 | -1.992 | 1.8293 | 60.24 | 0 | 0e+00 |
| HMOX1 | ENST00000216117 | 1.981 | 4.0728 | 22.88 | 0 | 0e+00 |
| CERCAM | ENST00000613052 | -1.975 | 3.8814 | 65.14 | 0 | 0e+00 |
| MOBP | ENST00000452959 | -1.973 | 8.5285 | 64.12 | 0 | 0e+00 |
| HBA2 | ENST00000482565 | -1.954 | 1.9636 | 49.47 | 0 | 0e+00 |
| HBA2 | ENST00000251595 | -1.954 | 1.9635 | 49.47 | 0 | 0e+00 |
| MTRNR2L12 | ENST00000600213 | -1.942 | 3.8243 | 63.04 | 0 | 0e+00 |
| MBP | ENST00000397865 | -1.939 | 9.9753 | 63.93 | 0 | 0e+00 |
| CBLN1 | ENST00000219197 | -1.937 | 2.5834 | 52.98 | 0 | 0e+00 |
| ZIC1 | ENST00000488404 | -1.932 | 1.4645 | 47.00 | 0 | 0e+00 |
| MOBP | ENST00000424090 | -1.931 | 4.6079 | 59.86 | 0 | 0e+00 |
| MBP | ENST00000397875 | -1.931 | 9.9826 | 64.07 | 0 | 0e+00 |
| MBP | ENST00000490319 | -1.922 | 10.0626 | 63.70 | 0 | 0e+00 |
| MBP | ENST00000585201 | -1.905 | 10.0999 | 63.43 | 0 | 0e+00 |
| AC026691.1 | ENST00000602502 | -1.902 | 2.6420 | 70.87 | 0 | 0e+00 |
We get normalized data with cpm(dgeObj, normalized.lib.sizes = TRUE) for the top AD vs Control transcripts.
all_samples_psp_vs_control_tr <- cpm(dgeObj, normalized.lib.sizes = TRUE) %>%
data.frame() %>%
filter(rownames(.) %in% psp_vs_control_top_tr$TranscriptIDs) %>%
dplyr::select(all_of(psp_cols)) %>%
`rownames<-`(PSP_vs_Control_GeneIDs$GeneID %>% make.unique)
Build a contrast AD vs PSP
AvsP <- makeContrasts(diagnosisAD - diagnosisPSP, levels=design)
lrt.AvsP <- glmLRT(fit, contrast=AvsP)
## topTags(lrt.AvsP, sort.by = "logFC", p.value = 0.05, n = 25)
## fold-change cutoff here is 1 to make the numger of transcripts
## close to the number of proteins
ad_vs_psp_top_tr <- topTags(lrt.AvsP, sort.by = "logFC", p.value = 0.05, n = 500) %>%
data.frame %>%
filter(abs(logFC) >= 1) %>%
rownames_to_column(var = "TranscriptIDs")
AD_vs_PSP_GeneIDs <- ad_vs_psp_top_tr$TranscriptIDs %>%
noquote %>%
gconvert(organism="hsapiens", target="ENSG", filter_na = FALSE) %>%
dplyr::select("GeneID" = name)
## write.csv(ad_vs_psp_prot, "./results/prot_ad_vs_psp.csv")
write.csv(data.frame(AD_vs_PSP_GeneIDs, ad_vs_psp_top_tr),
"./results_transcr/ad_vs_psp_tr.csv")
knitr::kable(data.frame(AD_vs_PSP_GeneIDs, ad_vs_psp_top_tr),
caption = "Top DE transcripts: AD vs PSP")
| GeneID | TranscriptIDs | logFC | logCPM | LR | PValue | FDR |
|---|---|---|---|---|---|---|
| AC091057.1 | ENST00000602616 | -2.265 | 1.6246 | 51.810 | 0.0000 | 0.0000 |
| FER1L6 | ENST00000522917 | 2.173 | 0.6279 | 27.169 | 0.0000 | 0.0000 |
| TGFBI | ENST00000442011 | -2.163 | 2.9260 | 34.882 | 0.0000 | 0.0000 |
| LTF | ENST00000231751 | 2.057 | 0.4877 | 40.320 | 0.0000 | 0.0000 |
| SCARA5 | ENST00000354914 | 2.024 | 1.7634 | 31.963 | 0.0000 | 0.0000 |
| SCARA5 | ENST00000380385 | 1.989 | 0.6688 | 30.637 | 0.0000 | 0.0000 |
| CD177 | ENST00000618265 | 1.976 | 0.3137 | 39.434 | 0.0000 | 0.0000 |
| COL1A1 | ENST00000225964 | -1.921 | 2.6313 | 28.277 | 0.0000 | 0.0000 |
| COL3A1 | ENST00000304636 | -1.894 | 2.3556 | 25.416 | 0.0000 | 0.0000 |
| HSPA6 | ENST00000309758 | -1.867 | 3.6476 | 22.707 | 0.0000 | 0.0000 |
| S100A12 | ENST00000368737 | 1.825 | 0.3891 | 33.761 | 0.0000 | 0.0000 |
| BMP5 | ENST00000370830 | 1.802 | 1.1730 | 23.794 | 0.0000 | 0.0000 |
| MTCO2P12 | ENST00000427426 | -1.796 | 3.5340 | 30.611 | 0.0000 | 0.0000 |
| SLC47A1 | ENST00000436810 | 1.787 | 0.6401 | 31.157 | 0.0000 | 0.0000 |
| HMOX1 | ENST00000412893 | -1.741 | 2.2612 | 32.929 | 0.0000 | 0.0000 |
| IL1R2 | ENST00000332549 | 1.685 | -0.7876 | 31.052 | 0.0000 | 0.0000 |
| MARCO | ENST00000327097 | -1.649 | 0.6494 | 20.184 | 0.0000 | 0.0001 |
| IFI30 | ENST00000597802 | -1.633 | 0.9885 | 27.524 | 0.0000 | 0.0000 |
| CXCL10 | ENST00000306602 | 1.632 | 0.0206 | 26.084 | 0.0000 | 0.0000 |
| SLPI | ENST00000338380 | 1.617 | 2.3328 | 19.556 | 0.0000 | 0.0001 |
| HMOX1 | ENST00000216117 | -1.564 | 4.0728 | 31.726 | 0.0000 | 0.0000 |
| SIX2 | ENST00000303077 | 1.549 | 0.0841 | 21.966 | 0.0000 | 0.0000 |
| TMEM30B | ENST00000555868 | 1.546 | 0.7573 | 28.188 | 0.0000 | 0.0000 |
| OGN | ENST00000262551 | 1.522 | 3.3344 | 18.888 | 0.0000 | 0.0001 |
| STEAP4 | ENST00000380079 | 1.506 | 1.0209 | 26.854 | 0.0000 | 0.0000 |
| SPTLC3 | ENST00000399002 | 1.478 | 2.0217 | 32.882 | 0.0000 | 0.0000 |
| None | ENST00000320354 | 1.474 | 0.2405 | 31.598 | 0.0000 | 0.0000 |
| SFN | ENST00000339276 | 1.469 | 0.8425 | 19.689 | 0.0000 | 0.0001 |
| CLIC6 | ENST00000360731 | 1.464 | -0.0853 | 24.822 | 0.0000 | 0.0000 |
| IFI30 | ENST00000407280 | -1.419 | 2.7254 | 26.052 | 0.0000 | 0.0000 |
| CRH | ENST00000276571 | -1.417 | 0.6056 | 32.910 | 0.0000 | 0.0000 |
| NPNT | ENST00000379987 | 1.402 | 1.6386 | 68.243 | 0.0000 | 0.0000 |
| DSP | ENST00000379802 | 1.397 | 3.2340 | 27.846 | 0.0000 | 0.0000 |
| SST | ENST00000287641 | -1.364 | 3.3812 | 42.191 | 0.0000 | 0.0000 |
| ECM2 | ENST00000444490 | 1.356 | 1.9907 | 40.702 | 0.0000 | 0.0000 |
| SLC47A1 | ENST00000497548 | 1.349 | 0.4482 | 34.871 | 0.0000 | 0.0000 |
| NPNT | ENST00000514837 | 1.347 | 0.4600 | 63.942 | 0.0000 | 0.0000 |
| SLC47A1 | ENST00000270570 | 1.337 | 0.5090 | 35.019 | 0.0000 | 0.0000 |
| SLC47A1 | ENST00000575023 | 1.335 | 0.4090 | 34.526 | 0.0000 | 0.0000 |
| DSP | ENST00000418664 | 1.331 | 2.6612 | 27.613 | 0.0000 | 0.0000 |
| SLC22A6 | ENST00000540654 | 1.329 | -0.0151 | 31.742 | 0.0000 | 0.0000 |
| SLC13A4 | ENST00000491630 | 1.328 | -0.0148 | 14.574 | 0.0001 | 0.0007 |
| SLC47A1 | ENST00000581558 | 1.327 | 0.3578 | 34.393 | 0.0000 | 0.0000 |
| PIRT | ENST00000580256 | 1.326 | 2.2798 | 40.411 | 0.0000 | 0.0000 |
| PNOC | ENST00000301908 | -1.326 | -0.3262 | 45.834 | 0.0000 | 0.0000 |
| VGF | ENST00000611537 | -1.316 | 3.5267 | 49.392 | 0.0000 | 0.0000 |
| IGF2 | ENST00000381395 | 1.305 | 3.2840 | 30.428 | 0.0000 | 0.0000 |
| None | ENST00000611099 | 1.303 | 3.0742 | 35.441 | 0.0000 | 0.0000 |
| VGF | ENST00000249330 | -1.298 | 3.5811 | 49.119 | 0.0000 | 0.0000 |
| None | ENST00000300632 | 1.296 | 3.1619 | 30.406 | 0.0000 | 0.0000 |
| NPNT | ENST00000305572 | 1.291 | 0.9332 | 70.545 | 0.0000 | 0.0000 |
| AL355974.1 | ENST00000618896 | 1.286 | -0.5409 | 31.485 | 0.0000 | 0.0000 |
| ECM2 | ENST00000344604 | 1.283 | 0.2427 | 41.634 | 0.0000 | 0.0000 |
| CPXM2 | ENST00000368854 | 1.279 | 0.8488 | 34.869 | 0.0000 | 0.0000 |
| VGF | ENST00000445482 | -1.276 | 3.4696 | 48.634 | 0.0000 | 0.0000 |
| IGFBP5 | ENST00000449583 | 1.275 | 1.1627 | 33.417 | 0.0000 | 0.0000 |
| PHLDB2 | ENST00000393923 | 1.271 | 2.2024 | 34.653 | 0.0000 | 0.0000 |
| PLIN1 | ENST00000300055 | 1.267 | 1.5933 | 60.119 | 0.0000 | 0.0000 |
| NPNT | ENST00000505917 | 1.258 | 1.0131 | 69.889 | 0.0000 | 0.0000 |
| APLNR | ENST00000257254 | 1.246 | 5.0432 | 33.802 | 0.0000 | 0.0000 |
| APLNR | ENST00000606794 | 1.246 | 5.0432 | 33.789 | 0.0000 | 0.0000 |
| SLC16A12 | ENST00000371790 | 1.244 | 0.4949 | 22.570 | 0.0000 | 0.0000 |
| IGFBP5 | ENST00000486341 | 1.242 | 0.5922 | 32.537 | 0.0000 | 0.0000 |
| RASSF9 | ENST00000361228 | 1.242 | -0.5991 | 32.997 | 0.0000 | 0.0000 |
| LEPR | ENST00000371059 | 1.236 | 2.4428 | 35.041 | 0.0000 | 0.0000 |
| FCGBP | ENST00000616721 | -1.236 | 4.5363 | 20.114 | 0.0000 | 0.0001 |
| TXNIP | ENST00000488537 | 1.233 | 4.8761 | 56.977 | 0.0000 | 0.0000 |
| TBX15 | ENST00000207157 | 1.228 | -0.0383 | 40.492 | 0.0000 | 0.0000 |
| ZIC1 | ENST00000488404 | 1.210 | 1.4645 | 28.567 | 0.0000 | 0.0000 |
| VNN2 | ENST00000418593 | 1.195 | -0.6440 | 25.795 | 0.0000 | 0.0000 |
| THSD4 | ENST00000357769 | 1.181 | -0.1266 | 36.314 | 0.0000 | 0.0000 |
| DSG2 | ENST00000261590 | 1.176 | -0.1590 | 19.958 | 0.0000 | 0.0001 |
| TXNIP | ENST00000486597 | 1.174 | 2.9089 | 53.940 | 0.0000 | 0.0000 |
| CP | ENST00000264613 | 1.168 | -0.3037 | 27.227 | 0.0000 | 0.0000 |
| TNFRSF11B | ENST00000297350 | 1.167 | 2.4486 | 35.359 | 0.0000 | 0.0000 |
| PLIN1 | ENST00000560330 | 1.166 | 0.5525 | 58.699 | 0.0000 | 0.0000 |
| FGL2 | ENST00000248598 | 1.159 | 3.5594 | 28.418 | 0.0000 | 0.0000 |
| THSD4 | ENST00000261862 | 1.155 | 3.0478 | 38.126 | 0.0000 | 0.0000 |
| PHLDB2 | ENST00000481953 | 1.154 | 0.1905 | 32.039 | 0.0000 | 0.0000 |
| THSD4 | ENST00000355327 | 1.150 | 3.1585 | 37.826 | 0.0000 | 0.0000 |
| TXNIP | ENST00000582401 | 1.142 | 7.2801 | 54.803 | 0.0000 | 0.0000 |
| VCAM1 | ENST00000370119 | 1.133 | 1.4273 | 22.377 | 0.0000 | 0.0000 |
| IL1RL1 | ENST00000311734 | 1.133 | 1.2369 | 15.743 | 0.0001 | 0.0004 |
| None | ENST00000406510 | 1.122 | 2.8575 | 35.130 | 0.0000 | 0.0000 |
| ZIC1 | ENST00000472523 | 1.119 | 2.6913 | 24.037 | 0.0000 | 0.0000 |
| PLIN1 | ENST00000430628 | 1.117 | 0.2383 | 56.023 | 0.0000 | 0.0000 |
| BNC2 | ENST00000380672 | 1.117 | 0.6369 | 30.048 | 0.0000 | 0.0000 |
| ZIC1 | ENST00000282928 | 1.109 | 2.4326 | 23.252 | 0.0000 | 0.0000 |
| OLFML2A | ENST00000373580 | 1.103 | 1.6315 | 36.271 | 0.0000 | 0.0000 |
| IL1RL1 | ENST00000404917 | 1.103 | 1.5676 | 14.986 | 0.0001 | 0.0006 |
| FOXD1 | ENST00000615637 | 1.102 | -0.2919 | 36.913 | 0.0000 | 0.0000 |
| SLC7A2 | ENST00000494857 | 1.101 | 5.2677 | 38.420 | 0.0000 | 0.0000 |
| SLC6A20 | ENST00000353278 | 1.099 | 1.2855 | 23.896 | 0.0000 | 0.0000 |
| LEPR | ENST00000616738 | 1.095 | 1.1400 | 34.119 | 0.0000 | 0.0000 |
| OLFML2A | ENST00000288815 | 1.094 | 1.5563 | 36.263 | 0.0000 | 0.0000 |
| IL1RL1 | ENST00000409584 | 1.084 | 1.0576 | 14.087 | 0.0002 | 0.0008 |
| CLEC4E | ENST00000299663 | 1.083 | -0.1953 | 26.982 | 0.0000 | 0.0000 |
| ANGPT2 | ENST00000523120 | 1.083 | 0.0113 | 26.545 | 0.0000 | 0.0000 |
| CYP1B1 | ENST00000494864 | 1.080 | 1.0422 | 17.790 | 0.0000 | 0.0002 |
| AC139426.1 | ENST00000568218 | -1.072 | -0.7013 | 6.267 | 0.0123 | 0.0282 |
| S100A8 | ENST00000477801 | 1.071 | 2.0185 | 17.906 | 0.0000 | 0.0002 |
| LEPR | ENST00000371060 | 1.069 | 1.1811 | 34.119 | 0.0000 | 0.0000 |
| SLC26A2 | ENST00000286298 | 1.068 | 3.7736 | 37.417 | 0.0000 | 0.0000 |
| LINC02192 | ENST00000570024 | -1.063 | 0.5860 | 31.730 | 0.0000 | 0.0000 |
| AL390755.1 | ENST00000614099 | 1.060 | 0.7093 | 33.202 | 0.0000 | 0.0000 |
| SLC6A20 | ENST00000456124 | 1.060 | 0.1308 | 24.563 | 0.0000 | 0.0000 |
| CYP1B1 | ENST00000614273 | 1.058 | -0.1752 | 17.525 | 0.0000 | 0.0002 |
| GJB2 | ENST00000382844 | 1.058 | 1.3173 | 19.354 | 0.0000 | 0.0001 |
| GALNT15 | ENST00000430410 | 1.058 | -0.4972 | 44.634 | 0.0000 | 0.0000 |
| KRT5 | ENST00000252242 | -1.056 | -0.4930 | 27.619 | 0.0000 | 0.0000 |
| GJB2 | ENST00000382848 | 1.056 | 1.3846 | 19.097 | 0.0000 | 0.0001 |
| SLC26A2 | ENST00000503336 | 1.054 | 3.7111 | 37.184 | 0.0000 | 0.0000 |
| COL24A1 | ENST00000370571 | -1.052 | 3.2431 | 51.189 | 0.0000 | 0.0000 |
| S100A8 | ENST00000368732 | 1.050 | 2.4470 | 17.775 | 0.0000 | 0.0002 |
| None | ENST00000380874 | 1.048 | 3.4435 | 42.039 | 0.0000 | 0.0000 |
| MMRN1 | ENST00000394980 | 1.042 | 1.2118 | 15.916 | 0.0001 | 0.0004 |
| ANGPT2 | ENST00000338312 | 1.042 | 0.1032 | 24.425 | 0.0000 | 0.0000 |
| CP | ENST00000481169 | 1.034 | 4.4195 | 26.592 | 0.0000 | 0.0000 |
| SERTM1 | ENST00000315190 | -1.030 | 3.4243 | 35.456 | 0.0000 | 0.0000 |
| FRMPD2 | ENST00000474573 | -1.028 | 0.8971 | 35.413 | 0.0000 | 0.0000 |
| CXCL8 | ENST00000307407 | -1.028 | 1.4163 | 12.903 | 0.0003 | 0.0014 |
| FAM20A | ENST00000226094 | 1.027 | 0.6134 | 17.291 | 0.0000 | 0.0002 |
| IL1RL1 | ENST00000233954 | 1.026 | 2.4945 | 12.780 | 0.0004 | 0.0015 |
| COL24A1 | ENST00000426639 | -1.025 | -0.4053 | 48.578 | 0.0000 | 0.0000 |
| PGR | ENST00000325455 | 1.024 | 1.5546 | 42.491 | 0.0000 | 0.0000 |
| SLC7A2 | ENST00000004531 | 1.022 | 4.8790 | 37.049 | 0.0000 | 0.0000 |
| CP | ENST00000473296 | 1.022 | -0.5519 | 24.944 | 0.0000 | 0.0000 |
| IL1RL1 | ENST00000463990 | 1.020 | -0.5689 | 12.798 | 0.0003 | 0.0014 |
| AL390755.1 | ENST00000617298 | 1.018 | 0.5406 | 30.927 | 0.0000 | 0.0000 |
| KLF4 | ENST00000493306 | 1.017 | 0.2475 | 34.246 | 0.0000 | 0.0000 |
| IL1RL1 | ENST00000427077 | 1.015 | -0.7783 | 13.490 | 0.0002 | 0.0011 |
| EMP1 | ENST00000542289 | 1.014 | 1.5059 | 26.247 | 0.0000 | 0.0000 |
| LIPH | ENST00000296252 | -1.014 | 0.1836 | 71.546 | 0.0000 | 0.0000 |
| LPAR4 | ENST00000435339 | 1.014 | -0.4470 | 31.699 | 0.0000 | 0.0000 |
| EMP1 | ENST00000431267 | 1.013 | 0.3996 | 26.999 | 0.0000 | 0.0000 |
| AP001993.1 | ENST00000527317 | -1.013 | -0.2069 | 34.277 | 0.0000 | 0.0000 |
| LINC01007 | ENST00000437900 | -1.013 | 1.0189 | 31.519 | 0.0000 | 0.0000 |
| LINC01007 | ENST00000434537 | -1.013 | 1.0189 | 31.519 | 0.0000 | 0.0000 |
| None | ENST00000431497 | -1.011 | 0.8876 | 37.144 | 0.0000 | 0.0000 |
| DLX6-AS1 | ENST00000452769 | -1.009 | 0.9233 | 37.165 | 0.0000 | 0.0000 |
| IL18R1 | ENST00000410040 | 1.008 | 0.1367 | 24.146 | 0.0000 | 0.0000 |
| GIMAP6 | ENST00000493969 | 1.008 | -0.5940 | 42.095 | 0.0000 | 0.0000 |
| DLX6-AS1 | ENST00000430027 | -1.006 | 3.3202 | 37.706 | 0.0000 | 0.0000 |
We get normalized data with cpm(dgeObj, normalized.lib.sizes = TRUE) for the top AD vs Control transcripts.
all_samples_ad_vs_psp_tr <- cpm(dgeObj, normalized.lib.sizes = TRUE) %>%
data.frame() %>%
filter(rownames(.) %in% ad_vs_psp_top_tr$TranscriptIDs) %>%
dplyr::select(all_of(ad_cols)) %>%
`rownames<-`(AD_vs_PSP_GeneIDs$GeneID %>% make.unique)
write.xlsx(all_samples_ad_vs_control_tr, "./results_transcr/transcriptsdata.xlsx",
sheetName = "all_samples_ad_vs_control_tr", append = TRUE)
write.xlsx(all_samples_ad_vs_psp_tr, "./results_transcr/transcriptsdata.xlsx",
sheetName = "all_samples_ad_vs_psp_tr", append = TRUE)
write.xlsx(all_samples_psp_vs_control_tr, "./results_transcr/transcriptsdata.xlsx",
sheetName = "all_samples_psp_vs_control_tr", append = TRUE)
write.xlsx(ad_vs_control_top_tr, "./results_transcr/transcriptsdata.xlsx",
sheetName = "ad_vs_control_top_tr", append = TRUE)
write.xlsx(psp_vs_control_top_tr, "./results_transcr/transcriptsdata.xlsx",
sheetName = "psp_vs_control_top_tr", append = TRUE)
write.xlsx(ad_vs_psp_top_tr, "./results_transcr/transcriptsdata.xlsx",
sheetName = "ad_vs_psp_top_tr", append = TRUE)
Prepare an object for clustering (gather AD and Control samples in one data frame).
all_samples_ad_and_control_tr <- cpm(dgeObj, normalized.lib.sizes = TRUE) %>%
data.frame() %>%
filter(rownames(.) %in% ad_vs_control_top_tr$TranscriptIDs) %>%
dplyr::select(all_of(c(ad_cols, control_cols))) %>%
`colnames<-`(c(make.unique(rep("AD", length(ad_cols))),
make.unique(rep("Control", length(control_cols))))) %>%
`rownames<-`(AD_vs_Control_GeneIDs$GeneID %>% make.unique)
Transform to matrix
## data.matrix() leaves numeric data as is.
ad_tr_matrix <- data.matrix(all_samples_ad_and_control_tr)
## rownames(ad_tr_matrix) <- dat_filt_ad$PIDshort
## ad_tr_matrix <- ad_tr_matrix[,-1]
ad_tr_scaled <- ad_tr_matrix %>% t %>% scale %>% t
Euclidean distance between experiments: AD & Control
ad_dist_sampl <- ad_tr_scaled %>% t %>%
dist(., method = "euclidean", diag = FALSE, upper = FALSE)
Euclidean distance between proteins: AD & Control
ad_dist_tr <- ad_tr_scaled %>%
dist(., method = "euclidean", diag = FALSE, upper = FALSE)
Hierarchical Ward’s clustering
ad_hclust_sampl <- hclust(ad_dist_sampl, method = "ward.D2", members = NULL)
ad_hclust_tr <- hclust(ad_dist_tr, method = "ward.D2", members = NULL)
png("./figs_transcr/hclust_ad.png", width = 2500, height = 3000, res = 300)
par(mfrow = c(2,1))
ad_hclust_sampl %>% plot(cex = 0.4, main = "Conditions. Number of samples: AD = 82, Control = 31")
ad_hclust_tr %>% plot(cex = 0.6, main = "Proteins. Number of samples: AD = 82, Control = 31")
dev.off()
## png
## 2
# Set colours for heatmap, 25 increments
my_palette <- colorRampPalette(c("blue","white","red"))(n = 25)
png("./figs_transcr/heatmap_ad.png", width = 2000, height = 2000, res = 300)
# Plot heatmap with heatmap.2
par(cex.main=0.75) # Shrink title fonts on plot
ad_tr_scaled %>%
# Plot heatmap
gplots::heatmap.2(., # Tidy, normalised data
Colv=as.dendrogram(ad_hclust_sampl), # Experiments clusters in cols
Rowv=as.dendrogram(ad_hclust_tr), # Protein clusters in rows
revC=TRUE, # Flip plot to match pheatmap
density.info="histogram", # Plot histogram of data and colour key
trace="none", # Turn of trace lines from heat map
col = my_palette, # Use my colour scheme
cexRow=0.3, cexCol=0.2, # Amend row and column label fonts
xlab = paste0("Number of samples: AD = ", length(ad_cols), ", Control = ", length(control_cols))
)
dev.off()
## png
## 2
Prepare an object for clustering (gather AD and Control samples in one data frame).
all_samples_psp_and_control_tr <- cpm(dgeObj, normalized.lib.sizes = TRUE) %>%
data.frame() %>%
filter(rownames(.) %in% psp_vs_control_top_tr$TranscriptIDs) %>%
dplyr::select(all_of(c(psp_cols, control_cols))) %>%
`colnames<-`(c(make.unique(rep("PSP", length(ad_cols))),
make.unique(rep("Control", length(control_cols))))) %>%
`rownames<-`(PSP_vs_Control_GeneIDs$GeneID %>% make.unique)
Transform to matrix
## data.matrix() leaves numeric data as is.
psp_tr_matrix <- data.matrix(all_samples_psp_and_control_tr)
psp_tr_scaled <- psp_tr_matrix %>% t %>% scale %>% t
Euclidean distance between experiments: PSP & Control
psp_dist_sampl <- psp_tr_scaled %>% t %>%
dist(., method = "euclidean", diag = FALSE, upper = FALSE)
Euclidean distance between proteins: PSP & Control
psp_dist_tr <- psp_tr_scaled %>%
dist(., method = "euclidean", diag = FALSE, upper = FALSE)
Hierarchical Ward’s clustering
psp_hclust_sampl <- hclust(psp_dist_sampl, method = "ward.D2", members = NULL)
psp_hclust_tr <- hclust(psp_dist_tr, method = "ward.D2", members = NULL)
png("./figs_transcr/hclust_psp.png", width = 2500, height = 3000, res = 300)
par(mfrow = c(2,1))
psp_hclust_sampl %>% plot(cex = 0.4, main = "Conditions. Number of samples: PSP = 84, Control = 31")
psp_hclust_tr %>% plot(cex = 0.6, main = "Proteins. Number of samples: PSP = 84, Control = 31")
dev.off()
## png
## 2
# Set colours for heatmap, 25 increments
my_palette <- colorRampPalette(c("blue","white","red"))(n = 25)
png("./figs_transcr/heatmap_psp.png", width = 2000, height = 2000, res = 300)
# Plot heatmap with heatmap.2
par(cex.main=0.75) # Shrink title fonts on plot
psp_tr_scaled %>%
# Plot heatmap
gplots::heatmap.2(., # Tidy, normalised data
Colv=as.dendrogram(psp_hclust_sampl), # Experiments clusters in cols
Rowv=as.dendrogram(psp_hclust_tr), # Protein clusters in rows
revC=TRUE, # Flip plot to match pheatmap
density.info="histogram", # Plot histogram of data and colour key
trace="none", # Turn of trace lines from heat map
col = my_palette, # Use my colour scheme
cexRow=0.3, cexCol=0.2, # Amend row and column label fonts
xlab = paste0("Number of samples: PSP = ", length(psp_cols), ", Control = ", length(control_cols))
)
dev.off()
## png
## 2
Prepare an object for clustering (gather AD and Control samples in one data frame).
all_samples_ad_and_psp_tr <- cpm(dgeObj, normalized.lib.sizes = TRUE) %>%
data.frame() %>%
filter(rownames(.) %in% ad_vs_psp_top_tr$TranscriptIDs) %>%
dplyr::select(all_of(c(ad_cols, psp_cols))) %>%
`colnames<-`(c(make.unique(rep("AD", length(ad_cols))),
make.unique(rep("PSP", length(control_cols))))) %>%
`rownames<-`(AD_vs_PSP_GeneIDs$GeneID %>% make.unique)
Transform to matrix
## data.matrix() leaves numeric data as is.
ad_psp_tr_matrix <- data.matrix(all_samples_ad_and_psp_tr)
ad_psp_tr_scaled <- ad_psp_tr_matrix %>% t %>% scale %>% t
Euclidean distance between experiments: AD & Control
ad_psp_dist_sampl <- ad_psp_tr_scaled %>% t %>%
dist(., method = "euclidean", diag = FALSE, upper = FALSE)
Euclidean distance between proteins: AD & PSP
ad_psp_dist_tr <- ad_psp_tr_scaled %>%
dist(., method = "euclidean", diag = FALSE, upper = FALSE)
Hierarchical Ward’s clustering
ad_psp_hclust_sampl <- hclust(ad_psp_dist_sampl, method = "ward.D2", members = NULL)
ad_psp_hclust_tr <- hclust(ad_psp_dist_tr, method = "ward.D2", members = NULL)
png("./figs_transcr/hclust_ad_psp.png", width = 2500, height = 3000, res = 300)
par(mfrow = c(2,1))
ad_psp_hclust_sampl %>% plot(cex = 0.4, main = "Conditions. Number of samples: AD = 82, PSP = 84")
ad_psp_hclust_tr %>% plot(cex = 0.6, main = "Proteins. Number of samples: AD = 82, PSP = 84")
dev.off()
## png
## 2
# Set colours for heatmap, 25 increments
my_palette <- colorRampPalette(c("blue","white","red"))(n = 25)
png("./figs_transcr/heatmap_ad_psp.png", width = 2000, height = 2000, res = 300)
# Plot heatmap with heatmap.2
par(cex.main=0.75) # Shrink title fonts on plot
ad_psp_tr_scaled %>%
# Plot heatmap
gplots::heatmap.2(., # Tidy, normalised data
Colv=as.dendrogram(ad_psp_hclust_sampl), # Experiments clusters in cols
Rowv=as.dendrogram(ad_psp_hclust_tr), # Protein clusters in rows
revC=TRUE, # Flip plot to match pheatmap
density.info="histogram", # Plot histogram of data and colour key
trace="none", # Turn of trace lines from heat map
col = my_palette, # Use my colour scheme
cexRow=0.3, cexCol=0.2, # Amend row and column label fonts
xlab = paste0("Number of samples: AD = ", length(ad_cols), ", PSP = ", length(psp_cols))
)
dev.off()
## png
## 2
## venn_list_tr <- list("AD vs Control" = topTags(lrt.AvsC, sort.by = "logFC",
## p.value = 0.05, n = nrow(lrt.AvsC)) %>%
## rownames() %>%
## noquote() %>%
## gconvert(organism="hsapiens", target="ENSG") %>%
## pull(name),
## "PSP vs Control" = topTags(lrt.PvsC, sort.by = "logFC",
## p.value = 0.05, n = nrow(lrt.PvsC)) %>%
## rownames() %>%
## noquote() %>%
## gconvert(organism="hsapiens", target="ENSG") %>%
## pull(name),
## "AD vs PSP" = topTags(lrt.AvsP, sort.by = "logFC",
## p.value = 0.05, n = nrow(lrt.AvsP)) %>%
## rownames() %>%
## noquote() %>%
## gconvert(organism="hsapiens", target="ENSG") %>%
## pull(name))
## saveRDS(venn_list_tr, "./results_transcr/venn_list_tr.rds")
venn_list_tr <- readRDS("./results_transcr/venn_list_tr.rds")
# Prevent the output of a log file
futile.logger::flog.threshold(futile.logger::ERROR, name = "VennDiagramLogger")
## NULL
# Create a venn diagram object
tr_venn <- venn.diagram(venn_list_tr,
NULL,
col = "transparent",
fill = c("cornflowerblue", "green", "yellow"),
alpha = 0.50,
cex = 0.8,
fontfamily = "sans",
fontface = "bold",
cat.col = c("darkblue", "darkgreen", "orange"),
cat.cex = 0.8,
cat.fontfamily = "sans",
margin = 0.2,
main = "Pairwise significantly DE transctipts identified in the experiment",
main.pos = c(0.5, 1.05),
main.fontfamily = "sans",
sub = "Number of samples: AD = 82, PSP = 84, Control = 31",
sub.pos = c(0.5, 0.92),
sub.cex = 0.8,
sub.fontfamily = "sans",
print.mode = c("raw","percent") # Show both numbers and percent
)
# Plot the venn diagram using the gridExtra package
png("./figs_transcr/venn.png", width = 2000, height = 2000, res = 300)
grid.arrange(gTree(children = tr_venn))
dev.off()
## png
## 2
## venn_list_tr_2 <- list(
## "AD-C +" = topTags(lrt.AvsC, sort.by = "logFC",
## p.value = 0.05, n = nrow(lrt.AvsC)) %>%
## data.frame() %>%
## filter(logFC > 0) %>%
## rownames() %>%
## noquote() %>%
## gconvert(organism="hsapiens", target="ENSG") %>%
## pull(name),
## "AD-C -" = topTags(lrt.AvsC, sort.by = "logFC",
## p.value = 0.05, n = nrow(lrt.AvsC)) %>%
## data.frame() %>%
## filter(logFC < 0) %>%
## rownames() %>%
## noquote() %>%
## gconvert(organism="hsapiens", target="ENSG") %>%
## pull(name),
## "P-C +" = topTags(lrt.PvsC, sort.by = "logFC",
## p.value = 0.05, n = nrow(lrt.PvsC)) %>%
## data.frame() %>%
## filter(logFC > 0) %>%
## rownames() %>%
## noquote() %>%
## gconvert(organism="hsapiens", target="ENSG") %>%
## pull(name),
## "P-C -" = topTags(lrt.PvsC, sort.by = "logFC",
## p.value = 0.05, n = nrow(lrt.PvsC)) %>%
## data.frame() %>%
## filter(logFC < 0) %>%
## rownames() %>%
## noquote() %>%
## gconvert(organism="hsapiens", target="ENSG") %>%
## pull(name),
## "AD-PSP +" = topTags(lrt.AvsP, sort.by = "logFC",
## p.value = 0.05, n = nrow(lrt.AvsP)) %>%
## data.frame() %>%
## filter(logFC > 0) %>%
## rownames() %>%
## noquote() %>%
## gconvert(organism="hsapiens", target="ENSG") %>%
## pull(name),
## "AD-PSP -" = topTags(lrt.AvsP, sort.by = "logFC",
## p.value = 0.05, n = nrow(lrt.AvsP)) %>%
## data.frame() %>%
## filter(logFC < 0) %>%
## rownames() %>%
## noquote() %>%
## gconvert(organism="hsapiens", target="ENSG") %>%
## pull(name)
## )
## saveRDS(venn_list_tr_2, "./results_transcr/venn_list_tr_2.rds")
venn_list_tr_2 <- readRDS("./results_transcr/venn_list_tr_2.rds")
tr_venn_up <- venn.diagram(venn_list_tr_2[c(1,3,5)],NULL,
col = "transparent",
fill = c("cornflowerblue", "green", "yellow"),
alpha = 0.50,
cex = 0.8,
fontfamily = "sans",
fontface = "bold",
cat.col = c("darkblue", "darkgreen", "orange"),
cat.cex = 0.8,
cat.fontfamily = "sans",
margin = 0.2,
main = "Pairwise significantly upregulated transcripts (adj.p<0.05) identified in the experiment",
main.pos = c(0.5, 1.05),
main.fontfamily = "sans",
sub = "Number of samples: AD = 82, PSP = 84, Control = 31",
sub.pos = c(0.5, 0.92),
sub.cex = 0.8,
sub.fontfamily = "sans",
print.mode = c("raw","percent") # Show both numbers and percent
)
# Plot the venn diagram using the gridExtra package
png("./figs_transcr/venn_up.png", width = 2000, height = 2000, res = 300)
grid.arrange(gTree(children = tr_venn_up))
dev.off()
## png
## 2
tr_venn_down <- venn.diagram(venn_list_tr_2[c(2,4,6)],NULL,
col = "transparent",
fill = c("cornflowerblue", "green", "yellow"),
alpha = 0.50,
cex = 0.8,
fontfamily = "sans",
fontface = "bold",
cat.col = c("darkblue", "darkgreen", "orange"),
cat.cex = 0.8,
cat.fontfamily = "sans",
margin = 0.2,
main = "Pairwise significantly downregulated transcripts (adj.p<0.05) identified in the experiment",
main.pos = c(0.5, 1.05),
main.fontfamily = "sans",
sub = "Number of samples: AD = 82, PSP = 84, Control = 31",
sub.pos = c(0.5, 0.92),
sub.cex = 0.8,
sub.fontfamily = "sans",
print.mode = c("raw","percent") # Show both numbers and percent
)
# Plot the venn diagram using the gridExtra package
png("./figs_transcr/venn_down.png", width = 2000, height = 2000, res = 300)
grid.arrange(gTree(children = tr_venn_down))
dev.off()
## png
## 2
EnhancedVolcano(lrt.AvsC %>% data.frame,
x = "logFC",
y = "PValue",
lab = gene_ids_tr$GeneID,
title = 'AD versus Control',
subtitle = "Number of samples: AD = 82, PSP = 84, Control = 31",
pCutoff = 0.05,
FCcutoff = 2,
pointSize = 0.5,
axisLabSize = 10,
labSize = 2.0)
ggsave("./figs_transcr/ad_vs_control_volcano.png")
## Saving 7 x 5 in image
EnhancedVolcano(lrt.PvsC %>% data.frame,
x = "logFC",
y = "PValue",
lab = gene_ids_tr$GeneID,
title = 'PSP versus Control',
subtitle = "Number of samples: AD = 82, PSP = 84, Control = 31",
pCutoff = 0.05,
FCcutoff = 1.9,
pointSize = 0.5,
axisLabSize = 10,
labSize = 2.0)
ggsave("./figs_transcr/psp_vs_control_volcano.png")
## Saving 7 x 5 in image
EnhancedVolcano(lrt.AvsP %>% data.frame,
x = "logFC",
y = "PValue",
lab = gene_ids_tr$GeneID,
title = 'AD versus PSP',
subtitle = "Number of samples: AD = 82, PSP = 84, Control = 31",
pCutoff = 0.05,
FCcutoff = 1,
pointSize = 0.5,
axisLabSize = 10,
labSize = 2.0)
ggsave("./figs_transcr/ad_vs_psp_volcano.png")
## Saving 7 x 5 in image
cpm(dgeObj, log = TRUE, normalized.lib.sizes = TRUE) %>%
data.frame() %>%
filter(rownames(.) %in% ad_vs_control_top_tr$TranscriptIDs) %>%
dplyr::select(all_of(c(ad_cols, control_cols, psp_cols))) %>%
`rownames<-`(AD_vs_Control_GeneIDs$GeneID %>% make.unique) %>%
t %>% data.frame %>%
dplyr::select(1:25) %>%
mutate(condition = c(rep("AD", length(ad_cols)),
rep("Control", length(control_cols)),
rep("PSP", length(psp_cols)))) %>%
filter(condition != "PSP") %>%
## For ggplot, you need to convert the data from wide format to long format.
pivot_longer(-condition, names_to = "protein", values_to = "level") %>%
## gglot uses the "condition" column to group boxplots: fill=condition in aes().
ggplot(aes(protein, level, fill=condition)) +
geom_boxplot(outlier.size = 0.5) +
scale_x_discrete(guide = guide_axis(n.dodge = 3)) +
xlab("") +
ylab("log2 expression level") +
ggtitle("AD vs Control top 25 significant genes. Number of samples: AD = 82, Control = 31") +
theme_light() +
theme(plot.title = element_text(size = 16, face = "bold"))
ggsave("./figs_transcr/boxplot_ad_vs_control.png", dpi = 300, scale = 2, width = 6, height = 3, units = "in")
cpm(dgeObj, log = TRUE, normalized.lib.sizes = TRUE) %>%
data.frame() %>%
filter(rownames(.) %in% psp_vs_control_top_tr$TranscriptIDs) %>%
dplyr::select(all_of(c(ad_cols, control_cols, psp_cols))) %>%
`rownames<-`(PSP_vs_Control_GeneIDs$GeneID %>% make.unique) %>%
t %>% data.frame %>%
dplyr::select(1:25) %>%
mutate(condition = c(rep("AD", length(ad_cols)),
rep("Control", length(control_cols)),
rep("PSP", length(psp_cols)))) %>%
filter(condition != "AD") %>%
## For ggplot, you need to convert the data from wide format to long format.
pivot_longer(-condition, names_to = "protein", values_to = "level") %>%
## gglot uses the "condition" column to group boxplots: fill=condition in aes().
mutate(conditions = factor(condition, levels = c("PSP", "Control"))) %>%
ggplot(aes(protein, level, fill=conditions)) +
geom_boxplot(outlier.size = 0.5) +
scale_x_discrete(guide = guide_axis(n.dodge = 3)) +
xlab("") +
ylab("log2 expression level") +
ggtitle("PSP vs Control top 25 significant genes. Number of samples: PSP = 84, Control = 31") +
theme_light() +
theme(plot.title = element_text(size = 16, face = "bold"))
ggsave("./figs_transcr/boxplot_psp_vs_control.png", dpi = 300, scale = 2, width = 6, height = 3, units = "in")
cpm(dgeObj, log = TRUE, normalized.lib.sizes = TRUE) %>%
data.frame() %>%
filter(rownames(.) %in% ad_vs_psp_top_tr$TranscriptIDs) %>%
dplyr::select(all_of(c(ad_cols, control_cols, psp_cols))) %>%
`rownames<-`(AD_vs_PSP_GeneIDs$GeneID %>% make.unique) %>%
t %>% data.frame %>%
dplyr::select(1:25) %>%
mutate(condition = c(rep("AD", length(ad_cols)),
rep("Control", length(control_cols)),
rep("PSP", length(psp_cols)))) %>%
filter(condition != "Control") %>%
## For ggplot, you need to convert the data from wide format to long format.
pivot_longer(-condition, names_to = "protein", values_to = "level") %>%
## gglot uses the "condition" column to group boxplots: fill=condition in aes().
ggplot(aes(protein, level, fill=condition)) +
geom_boxplot(outlier.size = 0.5) +
scale_x_discrete(guide = guide_axis(n.dodge = 3)) +
xlab("") +
ylab("log2 expression level") +
ggtitle("AD vs PSP top 25 significant genes. Number of samples: AD = 82, PSP = 84") +
theme_light() +
theme(plot.title = element_text(size = 16, face = "bold"))
ggsave("./figs_transcr/boxplot_ad_vs_psp.png", dpi = 300, scale = 2, width = 6, height = 3, units = "in")
We use results obtained earlier.
From: A simple function to format the correlation matrix
# ++++++++++++++++++++++++++++
# flattenCorrMatrix
# ++++++++++++++++++++++++++++
# cormat : matrix of the correlation coefficients
# pmat : matrix of the correlation p-values
flattenCorrMatrix <- function(cormat, pmat) {
ut <- upper.tri(cormat)
data.frame(
row = rownames(cormat)[row(cormat)[ut]],
column = rownames(cormat)[col(cormat)[ut]],
cor =(cormat)[ut],
p = pmat[ut]
)
}
We select from the normalized data and from the normalized and log2-transformed protein data only those samples that are also in the RNA data.
ccol <- c(1:31) # Control columns in 'data_all'
acol <- c(32:115) # AD columns in 'data_all'
pcol <- c(116:200) # PSP columns in 'data_all'
## We remove 3 samples that are absent in the transcriptomics.
acol <- acol[-c(29, 38)]
pcol <- pcol[-4]
## AD proteins in AD samples
ad_pr_top <- data_all %>%
`rownames<-`(.$PIDshort %>% make.unique) %>%
filter(rownames(.) %in% rownames(AvsC_top)) %>%
## filter(rownames(.) %in% ad_vs_control_prot$PIDshort) %>%
dplyr::select(all_of(acol))
## AD proteins in PSP samples
psp_pr_top <- data_all %>%
`rownames<-`(.$PIDshort %>% make.unique) %>%
filter(rownames(.) %in% rownames(AvsC_top)) %>%
## filter(rownames(.) %in% ad_vs_control_prot$PIDshort) %>%
dplyr::select(all_of(pcol))
## AD proteins in Control samples
control_pr_top <- data_all %>%
`rownames<-`(.$PIDshort %>% make.unique) %>%
filter(rownames(.) %in% rownames(AvsC_top)) %>%
## filter(rownames(.) %in% ad_vs_control_prot$PIDshort) %>%
dplyr::select(all_of(ccol))
## data_all contains log2 transformed data
ad_pr_top_in_all <- data_all %>%
`rownames<-`(.$PIDshort %>% make.unique) %>%
filter(rownames(.) %in% rownames(AvsC_top)) %>%
## filter(rownames(.) %in% ad_vs_control_prot$PIDshort) %>%
dplyr::select(all_of(c(acol, pcol, ccol)))
ad_pr_top_cor_all <- rcorr(t(as.matrix(ad_pr_top_in_all)), t(as.matrix(ad_pr_top_in_all)),
type = "spearman")
png("./figs_cor/prot_ad_in_all.png", width = 2000, height = 2000, res = 300)
corrplot(ad_pr_top_cor_all$r[1:nrow(ad_pr_top), 1:nrow(ad_pr_top)],
p.mat = ad_pr_top_cor_all$P[1:nrow(ad_pr_top), 1:nrow(ad_pr_top)],
sig.level = 0.05, insig = "blank",
## type="upper", order="hclust",
col = colorRampPalette(c("darkblue", "white", "green"))(200),
tl.col = "black", tl.cex = 0.4, tl.srt = 45)
mytitle = "Number of samples: AD = 82, PSP = 84, Control = 31"
mtext(side=3, line=2, at=-0.07, adj=0, cex=1, mytitle)
dev.off()
## png
## 2
In the above plot, correlations with p-value > 0.05 are shown as blank as they are considered insignificant.
png("./figs_cor/prot_ad_in_all_numb.png", width = 2000, height = 2000, res = 300)
corrplot(ad_pr_top_cor_all$r[1:nrow(ad_pr_top), 1:nrow(ad_pr_top)],
p.mat = ad_pr_top_cor_all$P[1:nrow(ad_pr_top), 1:nrow(ad_pr_top)],
sig.level = 0.05, insig = "blank",
## type="upper", order="hclust",
method = "number", number.cex = 0.2,
col = colorRampPalette(c("darkblue", "white", "green"))(200),
tl.col = "black", tl.cex = 0.4, tl.srt = 45)
mytitle = "Number of samples: AD = 82, PSP = 84, Control = 31"
mtext(side=3, line=2, at=-0.07, adj=0, cex=1, mytitle)
dev.off()
## png
## 2
Save correlation coefficients.
ad_top_r_all <- ad_pr_top_cor_all$r[1:nrow(ad_pr_top), 1:nrow(ad_pr_top)] %>%
data.frame() %>%
filter(rownames(.) %in% rownames(AvsC_top))
## filter(rownames(.) %in% ad_vs_control_prot$PIDshort)
write.csv(ad_top_r_all, "./results_cor/ad_prot_cor_coeffs_all.csv")
Save correlation coefficient’s alongside with p-values
write.csv(flattenCorrMatrix(cormat = ad_pr_top_cor_all$r[1:nrow(ad_pr_top),
1:nrow(ad_pr_top)],
pmat = ad_pr_top_cor_all$P[1:nrow(ad_pr_top),
1:nrow(ad_pr_top)]),
"./results_cor/ad_prot_cor_coeffs+p-values_all.csv")
transcripts_norm <- cpm(dgeObj, log = TRUE,
normalized.lib.sizes = TRUE) %>%
data.frame()
ad_all_tr_top <- transcripts_norm[c(ad_cols, psp_cols, control_cols)] %>%
filter(rownames(.) %in% ad_vs_control_top_tr$TranscriptIDs) %>%
`rownames<-`(rownames(all_samples_ad_vs_control_tr))
ad_tr_top_cor_all <- rcorr(t(as.matrix(ad_all_tr_top)),
t(as.matrix(ad_all_tr_top)),
type = "spearman")
png("./figs_cor/transcr_ad_in_all.png", width = 2000, height = 2000, res = 300)
corrplot(ad_tr_top_cor_all$r[1:nrow(all_samples_ad_vs_control_tr),
1:nrow(all_samples_ad_vs_control_tr)],
p.mat = ad_tr_top_cor_all$P[1:nrow(all_samples_ad_vs_control_tr),
1:nrow(all_samples_ad_vs_control_tr)],
## type="upper", order="hclust",
sig.level = 0.05, insig = "blank",
col = colorRampPalette(c("darkblue", "white", "green"))(200),
tl.col = "black", tl.cex = 0.4, tl.srt = 45)
mytitle = "Number of samples: AD = 82, PSP = 84, Control = 31"
mtext(side=3, line=2, at=-0.07, adj=0, cex=1, mytitle)
dev.off()
## png
## 2
In the above plot, correlations with p-value > 0.05 are shown as blank as they are considered insignificant.
png("./figs_cor/transcr_ad_in_all_numb.png", width = 2000, height = 2000, res = 300)
corrplot(ad_tr_top_cor_all$r[1:nrow(all_samples_ad_vs_control_tr),
1:nrow(all_samples_ad_vs_control_tr)],
p.mat = ad_tr_top_cor_all$P[1:nrow(all_samples_ad_vs_control_tr),
1:nrow(all_samples_ad_vs_control_tr)],
sig.level = 0.05, insig = "blank",
## type="upper", order="hclust",
method = "number", number.cex = 0.2,
col = colorRampPalette(c("darkblue", "white", "green"))(200),
tl.col = "black", tl.cex = 0.4, tl.srt = 45)
mytitle = "Number of samples: AD = 82, PSP = 84, Control = 31"
mtext(side=3, line=2, at=-0.07, adj=0, cex=1, mytitle)
dev.off()
## png
## 2
Save correlation coefficients.
write.csv(ad_tr_top_cor_all$r[1:nrow(all_samples_ad_vs_control_tr),
1:nrow(all_samples_ad_vs_control_tr)],
"./results_cor/ad_transcripts_cor_r_all.csv")
## knitr::kable(ad_tr_top_cor$r[1:nrow(all_samples_ad_vs_control_tr), 1:nrow(all_samples_ad_vs_control_tr)], caption = "Correlation coefficients for top 25 AD genes")
Save correlation coefficient’s p-values
write.csv(flattenCorrMatrix(cormat = ad_tr_top_cor_all$r,
pmat = ad_tr_top_cor_all$P),
"./results_cor/ad_transcripts_cor_coeffs+p-values_all.csv")
## knitr::kable(ad25P, caption = "Correlation coefficient's p-values for top 25 AD proteins")
i <- nrow(ad_pr_top_in_all)
j <- nrow(ad_all_tr_top)
ad_pr_tr_cor_all <- rcorr(t(as.matrix(ad_all_tr_top)),
t(as.matrix(ad_pr_top_in_all)),
type = "spearman")
## write_rds(list(A=A, B=B), "./ab.rds")
## ab <- readRDS("./ab.rds")
## rcorr(t(T).t(P))$r[number_of_transcripts+1 : number_of_transcripts+number_of_proteins,
## 1: number_of_transcripts]
png("./figs_cor/trpr_ad_in_all.png", width = 2000, height = 3300, res = 300)
corrplot(ad_pr_tr_cor_all$r[(j+1) : (i+j), 1:j],
p.mat = ad_pr_tr_cor_all$P[(j+1) : (i+j), 1:j],
## type="upper", order="hclust",
sig.level = 0.05, insig = "blank",
col = colorRampPalette(c("darkblue", "white", "green"))(200),
tl.col = "black", tl.cex = 0.3, tl.srt = 45)
mytitle = "Number of samples: AD = 82, PSP = 84, Control = 31"
mtext(side=3, line=2, at=-0.07, adj=0, cex=1, mytitle)
dev.off()
## png
## 2
In the above plot, correlations with p-value > 0.05 are shown as blank as they are considered insignificant.
png("./figs_cor/trpr_ad_in_all_numb.png", width = 2000, height = 3300, res = 300)
corrplot(ad_pr_tr_cor_all$r[(j+1) : (i+j), 1:j],
p.mat = ad_pr_tr_cor_all$P[(j+1) : (i+j), 1:j],
## type="upper", order="hclust",
sig.level = 0.05, insig = "blank",
method = "number", number.cex = 0.2,
col = colorRampPalette(c("darkblue", "white", "green"))(200),
tl.col = "black", tl.cex = 0.3, tl.srt = 45)
mytitle = "Number of samples: AD = 82, PSP = 84, Control = 31"
mtext(side=3, line=2, at=-0.07, adj=0, cex=1, mytitle)
dev.off()
## png
## 2
Save correlation coefficients.
write.csv(ad_pr_tr_cor_all$r[(j+1) : (i+j), 1:j],
"./results_cor/ad_prot_transcr_cor_r_all.csv")
Save correlation coefficient’s p-values
write.csv(flattenCorrMatrix(cormat = ad_pr_tr_cor_all$r[(j+1) : (i+j), 1:j],
pmat = ad_pr_tr_cor_all$P[(j+1) : (i+j), 1:j]),
"./results_cor/ad_prot_transcr_cor_coeffs+p-values_all.csv")